示例#1
0
    def update_keyring_caps(self, entity: Optional[str] = None) -> None:
        if not entity:
            entity = self.get_keyring_entity()

        osd_caps = 'allow rw pool=%s' % (self.spec.pool)
        if self.spec.namespace:
            osd_caps = '%s namespace=%s' % (osd_caps, self.spec.namespace)

        logger.info('Updating keyring caps: %s' % entity)
        ret, out, err = self.mgr.mon_command({
            'prefix':
            'auth caps',
            'entity':
            entity,
            'caps': ['mon', 'allow r', 'osd', osd_caps],
        })

        if ret != 0:
            raise OrchestratorError(
                    'Unable to update keyring caps %s: %s %s' \
                            % (entity, ret, err))
示例#2
0
    def generate_config(
            self, daemon_spec: CephadmDaemonSpec
    ) -> Tuple[Dict[str, Any], List[str]]:
        assert self.TYPE == daemon_spec.daemon_type
        assert daemon_spec.spec
        deps: List[str] = []

        cfg = CephadmExporterConfig(self.mgr)
        cfg.load_from_store()

        if cfg.ready:
            rc, reason = cfg.validate_config()
            if rc:
                raise OrchestratorError(reason)
        else:
            logger.info("Using default configuration for cephadm-exporter")
            self.mgr._set_exporter_defaults()
            cfg.load_from_store()

        config = {"crt": cfg.crt, "key": cfg.key, "token": cfg.token}
        return config, deps
示例#3
0
    def _get_container_image_info(self, image_name: str) -> ContainerInspectInfo:
        # pick a random host...
        host = None
        for host_name in self.mgr.inventory.keys():
            host = host_name
            break
        if not host:
            raise OrchestratorError('no hosts defined')
        if self.mgr.cache.host_needs_registry_login(host) and self.mgr.registry_url:
            self._registry_login(host, self.mgr.registry_url,
                                 self.mgr.registry_username, self.mgr.registry_password)

        j = self._run_cephadm_json(host, '', 'pull', [], image=image_name, no_fsid=True)

        r = ContainerInspectInfo(
            j['image_id'],
            j.get('ceph_version'),
            j.get('repo_digests')
        )
        self.log.debug(f'image {image_name} -> {r}')
        return r
示例#4
0
 async def _execute_command(self,
                            host: str,
                            cmd: List[str],
                            stdin: Optional[str] = None,
                            addr: Optional[str] = None,
                            ) -> Tuple[str, str, int]:
     conn = await self._remote_connection(host, addr)
     cmd = "sudo " + " ".join(quote(x) for x in cmd)
     logger.debug(f'Running command: {cmd}')
     try:
         r = await conn.run(cmd, input=stdin)
     # handle these Exceptions otherwise you might get a weird error like TypeError: __init__() missing 1 required positional argument: 'reason' (due to the asyncssh error interacting with raise_if_exception)
     except (asyncssh.ChannelOpenError, Exception) as e:
         # SSH connection closed or broken, will create new connection next call
         logger.debug(f'Connection to {host} failed. {str(e)}')
         await self._reset_con(host)
         self.mgr.offline_hosts.add(host)
         raise OrchestratorError(f'Unable to reach remote host {host}. {str(e)}')
     out = r.stdout.rstrip('\n')
     err = r.stderr.rstrip('\n')
     return out, err, r.returncode
示例#5
0
    def prepare_create(
            self,
            daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
        assert self.TYPE == daemon_spec.daemon_type

        cfg = CephadmExporterConfig(self.mgr)
        cfg.load_from_store()

        if cfg.ready:
            rc, reason = cfg.validate_config()
            if rc:
                raise OrchestratorError(reason)
        else:
            logger.info("Incomplete/Missing configuration, applying defaults")
            self.mgr._set_exporter_defaults()
            cfg.load_from_store()

        if not daemon_spec.ports:
            daemon_spec.ports = [int(cfg.port)]

        return daemon_spec
示例#6
0
    async def _execute_command(
        self,
        host: str,
        cmd: List[str],
        stdin: Optional[str] = None,
        addr: Optional[str] = None,
    ) -> Tuple[str, str, int]:
        conn = await self._remote_connection(host, addr)
        sudo_prefix = "sudo " if self.mgr.ssh_user != 'root' else ""
        cmd = sudo_prefix + " ".join(quote(x) for x in cmd)
        logger.debug(f'Running command: {cmd}')
        try:
            r = await conn.run('sudo true', check=True, timeout=5)
            r = await conn.run(cmd, input=stdin)
        # handle these Exceptions otherwise you might get a weird error like TypeError: __init__() missing 1 required positional argument: 'reason' (due to the asyncssh error interacting with raise_if_exception)
        except (asyncssh.ChannelOpenError, asyncssh.ProcessError,
                Exception) as e:
            # SSH connection closed or broken, will create new connection next call
            logger.debug(f'Connection to {host} failed. {str(e)}')
            await self._reset_con(host)
            self.mgr.offline_hosts.add(host)
            raise OrchestratorError(
                f'Unable to reach remote host {host}. {str(e)}')

        def _rstrip(v: Union[bytes, str, None]) -> str:
            if not v:
                return ''
            if isinstance(v, str):
                return v.rstrip('\n')
            if isinstance(v, bytes):
                return v.decode().rstrip('\n')
            raise OrchestratorError(
                f'Unable to parse ssh output with type {type(v)} from remote host {host}'
            )

        out = _rstrip(r.stdout)
        err = _rstrip(r.stderr)
        rc = r.returncode if r.returncode else 0

        return out, err, rc
示例#7
0
    def config(self, spec: RGWSpec, rgw_id: str) -> None:  # type: ignore
        assert self.TYPE == spec.service_type

        # set rgw_realm and rgw_zone, if present
        if spec.rgw_realm:
            ret, out, err = self.mgr.check_mon_command({
                'prefix': 'config set',
                'who':
                f"{utils.name_to_config_section('rgw')}.{spec.service_id}",
                'name': 'rgw_realm',
                'value': spec.rgw_realm,
            })
        if spec.rgw_zone:
            ret, out, err = self.mgr.check_mon_command({
                'prefix': 'config set',
                'who':
                f"{utils.name_to_config_section('rgw')}.{spec.service_id}",
                'name': 'rgw_zone',
                'value': spec.rgw_zone,
            })

        if spec.rgw_frontend_ssl_certificate:
            if isinstance(spec.rgw_frontend_ssl_certificate, list):
                cert_data = '\n'.join(spec.rgw_frontend_ssl_certificate)
            elif isinstance(spec.rgw_frontend_ssl_certificate, str):
                cert_data = spec.rgw_frontend_ssl_certificate
            else:
                raise OrchestratorError(
                    'Invalid rgw_frontend_ssl_certificate: %s' %
                    spec.rgw_frontend_ssl_certificate)
            ret, out, err = self.mgr.check_mon_command({
                'prefix': 'config-key set',
                'key': f'rgw/cert/{spec.service_name()}',
                'val': cert_data,
            })

        # TODO: fail, if we don't have a spec
        logger.info('Saving service %s spec with placement %s' %
                    (spec.service_name(), spec.placement.pretty_str()))
        self.mgr.spec_store.save(spec)
示例#8
0
    def generate_config(
        self, daemon_spec: CephadmDaemonDeploySpec
    ) -> Tuple[Dict[str, Any], List[str]]:
        try:
            assert self.mgr.cherrypy_thread
            assert self.mgr.cherrypy_thread.ssl_certs.get_root_cert()
            assert self.mgr.cherrypy_thread.server_port
        except Exception:
            raise OrchestratorError(
                'Cannot deploy agent daemons until cephadm endpoint has finished generating certs'
            )

        cfg = {
            'target_ip': self.mgr.get_mgr_ip(),
            'target_port': self.mgr.cherrypy_thread.server_port,
            'refresh_period': self.mgr.agent_refresh_rate,
            'listener_port': self.mgr.agent_starting_port,
            'host': daemon_spec.host,
            'device_enhanced_scan': str(self.mgr.device_enhanced_scan)
        }

        listener_cert, listener_key = self.mgr.cherrypy_thread.ssl_certs.generate_cert(
            self.mgr.inventory.get_addr(daemon_spec.host))
        config = {
            'agent.json': json.dumps(cfg),
            'keyring': daemon_spec.keyring,
            'root_cert.pem':
            self.mgr.cherrypy_thread.ssl_certs.get_root_cert(),
            'listener.crt': listener_cert,
            'listener.key': listener_key,
        }

        return config, sorted([
            str(self.mgr.get_mgr_ip()),
            str(self.mgr.cherrypy_thread.server_port),
            self.mgr.cherrypy_thread.ssl_certs.get_root_cert(),
            str(self.mgr.get_module_option('device_enhanced_scan'))
        ])
示例#9
0
 async def _write_remote_file(
     self,
     host: str,
     path: str,
     content: bytes,
     mode: Optional[int] = None,
     uid: Optional[int] = None,
     gid: Optional[int] = None,
     addr: Optional[str] = None,
 ) -> None:
     try:
         dirname = os.path.dirname(path)
         await self._check_execute_command(host, ['mkdir', '-p', dirname],
                                           addr=addr)
         tmp_path = path + '.new'
         await self._check_execute_command(host, ['touch', tmp_path],
                                           addr=addr)
         if uid is not None and gid is not None and mode is not None:
             # shlex quote takes str or byte object, not int
             await self._check_execute_command(
                 host, ['chown', '-R',
                        str(uid) + ':' + str(gid), tmp_path],
                 addr=addr)
             await self._check_execute_command(
                 host, ['chmod', oct(mode)[2:], tmp_path], addr=addr)
         with NamedTemporaryFile(prefix='cephadm-write-remote-file-') as f:
             os.fchmod(f.fileno(), 0o600)
             f.write(content)
             f.flush()
             conn = await self._remote_connection(host, addr)
             await asyncssh.scp(f.name, (conn, tmp_path))
         await self._check_execute_command(host, ['mv', tmp_path, path],
                                           addr=addr)
     except Exception as e:
         msg = f"Unable to write {host}:{path}: {e}"
         logger.exception(msg)
         raise OrchestratorError(msg)
示例#10
0
文件: osd.py 项目: yzhan298/ceph
    def find_destroyed_osds(self) -> Dict[str, List[str]]:
        osd_host_map: Dict[str, List[str]] = dict()
        try:
            ret, out, err = self.mgr.check_mon_command({
                'prefix': 'osd tree',
                'states': ['destroyed'],
                'format': 'json'
            })
        except MonCommandFailed as e:
            logger.exception('osd tree failed')
            raise OrchestratorError(str(e))
        try:
            tree = json.loads(out)
        except json.decoder.JSONDecodeError:
            logger.exception(f"Could not decode json -> {out}")
            return osd_host_map

        nodes = tree.get('nodes', {})
        for node in nodes:
            if node.get('type') == 'host':
                osd_host_map.update(
                    {node.get('name'): [str(_id) for _id in node.get('children', list())]}
                )
        return osd_host_map
示例#11
0
    def _run_cephadm(
        self,
        host: str,
        entity: Union[CephadmNoImage, str],
        command: str,
        args: List[str],
        addr: Optional[str] = "",
        stdin: Optional[str] = "",
        no_fsid: Optional[bool] = False,
        error_ok: Optional[bool] = False,
        image: Optional[str] = "",
        env_vars: Optional[List[str]] = None,
    ) -> Tuple[List[str], List[str], int]:
        """
        Run cephadm on the remote host with the given command + args

        Important: You probably don't want to run _run_cephadm from CLI handlers

        :env_vars: in format -> [KEY=VALUE, ..]
        """
        self.log.debug(f"_run_cephadm : command = {command}")
        self.log.debug(f"_run_cephadm : args = {args}")

        bypass_image = ('cephadm-exporter', )

        with self._remote_connection(host, addr) as tpl:
            conn, connr = tpl
            assert image or entity
            # Skip the image check for daemons deployed that are not ceph containers
            if not str(entity).startswith(bypass_image):
                if not image and entity is not cephadmNoImage:
                    image = self.mgr._get_container_image(entity)

            final_args = []

            if env_vars:
                for env_var_pair in env_vars:
                    final_args.extend(['--env', env_var_pair])

            if image:
                final_args.extend(['--image', image])
            final_args.append(command)

            if not no_fsid:
                final_args += ['--fsid', self.mgr._cluster_fsid]

            if self.mgr.container_init:
                final_args += ['--container-init']

            final_args += args

            self.log.debug('args: %s' % (' '.join(final_args)))
            if self.mgr.mode == 'root':
                if stdin:
                    self.log.debug('stdin: %s' % stdin)
                script = 'injected_argv = ' + json.dumps(final_args) + '\n'
                if stdin:
                    script += 'injected_stdin = ' + json.dumps(stdin) + '\n'
                script += self.mgr._cephadm
                python = connr.choose_python()
                if not python:
                    raise RuntimeError(
                        'unable to find python on %s (tried %s in %s)' %
                        (host, remotes.PYTHONS, remotes.PATH))
                try:
                    out, err, code = remoto.process.check(
                        conn, [python, '-u'], stdin=script.encode('utf-8'))
                except RuntimeError as e:
                    self.mgr._reset_con(host)
                    if error_ok:
                        return [], [str(e)], 1
                    raise
            elif self.mgr.mode == 'cephadm-package':
                try:
                    out, err, code = remoto.process.check(
                        conn, ['sudo', '/usr/bin/cephadm'] + final_args,
                        stdin=stdin)
                except RuntimeError as e:
                    self.mgr._reset_con(host)
                    if error_ok:
                        return [], [str(e)], 1
                    raise
            else:
                assert False, 'unsupported mode'

            self.log.debug('code: %d' % code)
            if out:
                self.log.debug('out: %s' % '\n'.join(out))
            if err:
                self.log.debug('err: %s' % '\n'.join(err))
            if code and not error_ok:
                raise OrchestratorError(
                    'cephadm exited with an error code: %d, stderr:%s' %
                    (code, '\n'.join(err)))
            return out, err, code
示例#12
0
    def _create_daemon(
        self,
        daemon_spec: CephadmDaemonSpec,
        reconfig: bool = False,
        osd_uuid_map: Optional[Dict[str, Any]] = None,
    ) -> str:

        with set_exception_subject('service',
                                   orchestrator.DaemonDescription(
                                       daemon_type=daemon_spec.daemon_type,
                                       daemon_id=daemon_spec.daemon_id,
                                       hostname=daemon_spec.host,
                                   ).service_id(),
                                   overwrite=True):

            image = ''
            start_time = datetime_now()
            ports: List[int] = daemon_spec.ports if daemon_spec.ports else []

            if daemon_spec.daemon_type == 'container':
                spec: Optional[CustomContainerSpec] = daemon_spec.spec
                if spec is None:
                    # Exit here immediately because the required service
                    # spec to create a daemon is not provided. This is only
                    # provided when a service is applied via 'orch apply'
                    # command.
                    msg = "Failed to {} daemon {} on {}: Required " \
                          "service specification not provided".format(
                              'reconfigure' if reconfig else 'deploy',
                              daemon_spec.name(), daemon_spec.host)
                    self.log.info(msg)
                    return msg
                image = spec.image
                if spec.ports:
                    ports.extend(spec.ports)

            if daemon_spec.daemon_type == 'cephadm-exporter':
                if not reconfig:
                    assert daemon_spec.host
                    deploy_ok = self._deploy_cephadm_binary(daemon_spec.host)
                    if not deploy_ok:
                        msg = f"Unable to deploy the cephadm binary to {daemon_spec.host}"
                        self.log.warning(msg)
                        return msg

            if daemon_spec.daemon_type == 'haproxy':
                haspec = cast(HA_RGWSpec, daemon_spec.spec)
                if haspec.haproxy_container_image:
                    image = haspec.haproxy_container_image

            if daemon_spec.daemon_type == 'keepalived':
                haspec = cast(HA_RGWSpec, daemon_spec.spec)
                if haspec.keepalived_container_image:
                    image = haspec.keepalived_container_image

            cephadm_config, deps = self.mgr.cephadm_services[
                daemon_type_to_service(
                    daemon_spec.daemon_type)].generate_config(daemon_spec)

            # TCP port to open in the host firewall
            if len(ports) > 0:
                daemon_spec.extra_args.extend(
                    ['--tcp-ports', ' '.join(map(str, ports))])

            # osd deployments needs an --osd-uuid arg
            if daemon_spec.daemon_type == 'osd':
                if not osd_uuid_map:
                    osd_uuid_map = self.mgr.get_osd_uuid_map()
                osd_uuid = osd_uuid_map.get(daemon_spec.daemon_id)
                if not osd_uuid:
                    raise OrchestratorError('osd.%s not in osdmap' %
                                            daemon_spec.daemon_id)
                daemon_spec.extra_args.extend(['--osd-fsid', osd_uuid])

            if reconfig:
                daemon_spec.extra_args.append('--reconfig')
            if self.mgr.allow_ptrace:
                daemon_spec.extra_args.append('--allow-ptrace')

            if self.mgr.cache.host_needs_registry_login(
                    daemon_spec.host) and self.mgr.registry_url:
                self._registry_login(daemon_spec.host, self.mgr.registry_url,
                                     self.mgr.registry_username,
                                     self.mgr.registry_password)

            daemon_spec.extra_args.extend(['--config-json', '-'])

            self.log.info('%s daemon %s on %s' %
                          ('Reconfiguring' if reconfig else 'Deploying',
                           daemon_spec.name(), daemon_spec.host))

            out, err, code = self._run_cephadm(
                daemon_spec.host,
                daemon_spec.name(),
                'deploy', [
                    '--name',
                    daemon_spec.name(),
                ] + daemon_spec.extra_args,
                stdin=json.dumps(cephadm_config),
                image=image)
            if not code and daemon_spec.host in self.mgr.cache.daemons:
                # prime cached service state with what we (should have)
                # just created
                sd = orchestrator.DaemonDescription()
                sd.daemon_type = daemon_spec.daemon_type
                sd.daemon_id = daemon_spec.daemon_id
                sd.hostname = daemon_spec.host
                sd.status = 1
                sd.status_desc = 'starting'
                self.mgr.cache.add_daemon(daemon_spec.host, sd)
                if daemon_spec.daemon_type in [
                        'grafana', 'iscsi', 'prometheus', 'alertmanager'
                ]:
                    self.mgr.requires_post_actions.add(daemon_spec.daemon_type)
            self.mgr.cache.invalidate_host_daemons(daemon_spec.host)
            self.mgr.cache.update_daemon_config_deps(daemon_spec.host,
                                                     daemon_spec.name(), deps,
                                                     start_time)
            self.mgr.cache.save_host(daemon_spec.host)
            msg = "{} {} on host '{}'".format(
                'Reconfigured' if reconfig else 'Deployed', daemon_spec.name(),
                daemon_spec.host)
            if not code:
                self.mgr.events.for_daemon(daemon_spec.name(),
                                           OrchestratorEvent.INFO, msg)
            else:
                what = 'reconfigure' if reconfig else 'deploy'
                self.mgr.events.for_daemon(daemon_spec.name(),
                                           OrchestratorEvent.ERROR,
                                           f'Failed to {what}: {err}')
            return msg
示例#13
0
 def verify_no_migration(self) -> None:
     if self.is_migration_ongoing():
         # this is raised in module.serve()
         raise OrchestratorError(
             "cephadm migration still ongoing. Please wait, until the migration is complete.")
示例#14
0
文件: osd.py 项目: zli091/ceph
 def _find_inv_for_host(hostname: str, inventory_dict: dict) -> List[Device]:
     # This is stupid and needs to be loaded with the host
     for _host, _inventory in inventory_dict.items():
         if _host == hostname:
             return _inventory
     raise OrchestratorError("No inventory found for host: {}".format(hostname))
示例#15
0
 def assert_host(self, host: str) -> None:
     if host not in self._inventory:
         raise OrchestratorError('host %s does not exist' % host)
示例#16
0
def resolve_ip(hostname: str) -> str:
    try:
        return socket.getaddrinfo(hostname, None, flags=socket.AI_CANONNAME, type=socket.SOCK_STREAM)[0][4][0]
    except socket.gaierror as e:
        raise OrchestratorError(f"Cannot resolve ip for host {hostname}: {e}")
示例#17
0
    def create_realm_zonegroup_zone(self, spec: RGWSpec, rgw_id: str):
        if utils.get_cluster_health(self.mgr) != 'HEALTH_OK':
            raise OrchestratorError(
                'Health not ok, will try agin when health ok')

        #get keyring needed to run rados commands and strip out just the keyring
        keyring = self.get_keyring(rgw_id).split('key = ', 1)[1].rstrip()

        # We can call radosgw-admin within the container, cause cephadm gives the MGR the required keyring permissions
        # get realms
        cmd = [
            'radosgw-admin',
            '--key=%s' % keyring, '--user',
            'rgw.%s' % rgw_id, 'realm', 'list', '--format=json'
        ]
        result = subprocess.run(cmd,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
        # create realm if needed
        cmd = [
            'radosgw-admin',
            '--key=%s' % keyring, '--user',
            'rgw.%s' % rgw_id, 'realm', 'create',
            '--rgw-realm=%s' % spec.rgw_realm, '--default'
        ]
        if not result.stdout:
            result = subprocess.run(cmd,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            self.mgr.log.info('created realm: %s' % spec.rgw_realm)
        else:
            try:
                j = json.loads(result.stdout)
                if 'realms' not in j or spec.rgw_realm not in j['realms']:
                    result = subprocess.run(cmd,
                                            stdout=subprocess.PIPE,
                                            stderr=subprocess.PIPE)
                    self.mgr.log.info('created realm: %s' % spec.rgw_realm)
            except Exception as e:
                raise OrchestratorError('failed to parse realm info')

        # get zonegroup
        cmd = [
            'radosgw-admin',
            '--key=%s' % keyring, '--user',
            'rgw.%s' % rgw_id, 'zonegroup', 'list', '--format=json'
        ]
        result = subprocess.run(cmd,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
        #create zonegroup if needed
        cmd = [
            'radosgw-admin',
            '--key=%s' % keyring, '--user',
            'rgw.%s' % rgw_id, 'zonegroup', 'create',
            '--rgw-zonegroup=default', '--master', '--default'
        ]
        if not result.stdout:
            result = subprocess.run(cmd,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            self.mgr.log.info('created zonegroup: default')
        else:
            try:
                j = json.loads(result.stdout)
                if 'zonegroups' not in j or 'default' not in j['zonegroups']:
                    result = subprocess.run(cmd,
                                            stdout=subprocess.PIPE,
                                            stderr=subprocess.PIPE)
                    self.mgr.log.info('created zonegroup: default')
            except Exception as e:
                raise OrchestratorError('failed to parse zonegroup info')

        #get zones
        cmd = [
            'radosgw-admin',
            '--key=%s' % keyring, '--user',
            'rgw.%s' % rgw_id, 'zone', 'list', '--format=json'
        ]
        result = subprocess.run(cmd,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
        #create zone if needed
        cmd = [
            'radosgw-admin',
            '--key=%s' % keyring, '--user',
            'rgw.%s' % rgw_id, 'zone', 'create', '--rgw-zonegroup=default',
            '--rgw-zone=%s' % spec.rgw_zone, '--master', '--default'
        ]
        if not result.stdout:
            result = subprocess.run(cmd,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            self.mgr.log.info('created zone: %s' % spec.rgw_zone)
        else:
            try:
                j = json.loads(result.stdout)
                if 'zones' not in j or spec.rgw_zone not in j['zones']:
                    result = subprocess.run(cmd,
                                            stdout=subprocess.PIPE,
                                            stderr=subprocess.PIPE)
                    self.mgr.log.info('created zone: %s' % spec.rgw_zone)
            except Exception as e:
                raise OrchestratorError('failed to parse zone info')
示例#18
0
 def _search(daemons):
     args = [('osd.%s' % d.service_instance, d.nodename) for d in daemons]
     if not args:
         raise OrchestratorError('Unable to find osd.%s' % name)
     return self._remove_daemon(args)
示例#19
0
文件: ingress.py 项目: zhangsw/ceph
    def keepalived_generate_config(
        self,
        daemon_spec: CephadmDaemonDeploySpec,
    ) -> Tuple[Dict[str, Any], List[str]]:
        spec = cast(IngressSpec,
                    self.mgr.spec_store[daemon_spec.service_name].spec)
        assert spec.backend_service

        # generate password?
        pw_key = f'{spec.service_name()}/keepalived_password'
        password = self.mgr.get_store(pw_key)
        if password is None:
            if not spec.keepalived_password:
                password = ''.join(
                    random.choice(string.ascii_lowercase) for _ in range(20))
                self.mgr.set_store(pw_key, password)
        else:
            if spec.keepalived_password:
                self.mgr.set_store(pw_key, None)
        if spec.keepalived_password:
            password = spec.keepalived_password

        daemons = self.mgr.cache.get_daemons_by_service(spec.service_name())

        if not daemons:
            raise OrchestratorError(
                f'Failed to generate keepalived.conf: No daemons deployed for {spec.service_name()}'
            )

        deps = sorted(
            [d.name() for d in daemons if d.daemon_type == 'haproxy'])

        host = daemon_spec.host
        hosts = sorted(list(set([host] + [str(d.hostname) for d in daemons])))

        # interface
        bare_ip = str(spec.virtual_ip).split('/')[0]
        interface = None
        for subnet, ifaces in self.mgr.cache.networks.get(host, {}).items():
            if ifaces and ipaddress.ip_address(
                    bare_ip) in ipaddress.ip_network(subnet):
                interface = list(ifaces.keys())[0]
                logger.info(
                    f'{bare_ip} is in {subnet} on {host} interface {interface}'
                )
                break
        # try to find interface by matching spec.virtual_interface_networks
        if not interface and spec.virtual_interface_networks:
            for subnet, ifaces in self.mgr.cache.networks.get(host,
                                                              {}).items():
                if subnet in spec.virtual_interface_networks:
                    interface = list(ifaces.keys())[0]
                    logger.info(
                        f'{spec.virtual_ip} will be configured on {host} interface '
                        f'{interface} (which has guiding subnet {subnet})')
                    break
        if not interface:
            raise OrchestratorError(
                f"Unable to identify interface for {spec.virtual_ip} on {host}"
            )

        # script to monitor health
        script = '/usr/bin/false'
        for d in daemons:
            if d.hostname == host:
                if d.daemon_type == 'haproxy':
                    assert d.ports
                    port = d.ports[1]  # monitoring port
                    script = f'/usr/bin/curl {build_url(scheme="http", host=d.ip or "localhost", port=port)}/health'
        assert script

        # set state. first host in placement is master all others backups
        state = 'BACKUP'
        if hosts[0] == host:
            state = 'MASTER'

        # remove host, daemon is being deployed on from hosts list for
        # other_ips in conf file and converter to ips
        if host in hosts:
            hosts.remove(host)
        other_ips = [resolve_ip(self.mgr.inventory.get_addr(h)) for h in hosts]

        keepalived_conf = self.mgr.template.render(
            'services/ingress/keepalived.conf.j2', {
                'spec': spec,
                'script': script,
                'password': password,
                'interface': interface,
                'state': state,
                'other_ips': other_ips,
                'host_ip': resolve_ip(self.mgr.inventory.get_addr(host)),
            })

        config_file = {
            'files': {
                "keepalived.conf": keepalived_conf,
            }
        }

        return config_file, deps
示例#20
0
 def remove_osds(self, name):
     daemons = self._get_services('osd', service_id=name)
     args = [('osd.%s' % d.service_instance, d.nodename) for d in daemons]
     if not args:
         raise OrchestratorError('Unable to find osd.%s' % name)
     return self._remove_daemon(args)
示例#21
0
    def _create_daemon(self,
                       daemon_spec: CephadmDaemonDeploySpec,
                       reconfig: bool = False,
                       osd_uuid_map: Optional[Dict[str, Any]] = None,
                       ) -> str:

        with set_exception_subject('service', orchestrator.DaemonDescription(
                daemon_type=daemon_spec.daemon_type,
                daemon_id=daemon_spec.daemon_id,
                hostname=daemon_spec.host,
        ).service_id(), overwrite=True):

            try:
                image = ''
                start_time = datetime_now()
                ports: List[int] = daemon_spec.ports if daemon_spec.ports else []

                if daemon_spec.daemon_type == 'container':
                    spec = cast(CustomContainerSpec,
                                self.mgr.spec_store[daemon_spec.service_name].spec)
                    image = spec.image
                    if spec.ports:
                        ports.extend(spec.ports)

                if daemon_spec.daemon_type == 'cephadm-exporter':
                    if not reconfig:
                        assert daemon_spec.host
                        self._deploy_cephadm_binary(daemon_spec.host)

                if daemon_spec.daemon_type == 'haproxy':
                    haspec = cast(HA_RGWSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
                    if haspec.haproxy_container_image:
                        image = haspec.haproxy_container_image

                if daemon_spec.daemon_type == 'keepalived':
                    haspec = cast(HA_RGWSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
                    if haspec.keepalived_container_image:
                        image = haspec.keepalived_container_image

                # TCP port to open in the host firewall
                if len(ports) > 0:
                    daemon_spec.extra_args.extend([
                        '--tcp-ports', ' '.join(map(str, ports))
                    ])

                # osd deployments needs an --osd-uuid arg
                if daemon_spec.daemon_type == 'osd':
                    if not osd_uuid_map:
                        osd_uuid_map = self.mgr.get_osd_uuid_map()
                    osd_uuid = osd_uuid_map.get(daemon_spec.daemon_id)
                    if not osd_uuid:
                        raise OrchestratorError('osd.%s not in osdmap' % daemon_spec.daemon_id)
                    daemon_spec.extra_args.extend(['--osd-fsid', osd_uuid])

                if reconfig:
                    daemon_spec.extra_args.append('--reconfig')
                if self.mgr.allow_ptrace:
                    daemon_spec.extra_args.append('--allow-ptrace')

                if self.mgr.cache.host_needs_registry_login(daemon_spec.host) and self.mgr.registry_url:
                    self._registry_login(daemon_spec.host, self.mgr.registry_url,
                                         self.mgr.registry_username, self.mgr.registry_password)

                self.log.info('%s daemon %s on %s' % (
                    'Reconfiguring' if reconfig else 'Deploying',
                    daemon_spec.name(), daemon_spec.host))

                out, err, code = self._run_cephadm(
                    daemon_spec.host, daemon_spec.name(), 'deploy',
                    [
                        '--name', daemon_spec.name(),
                        '--meta-json', json.dumps({
                            'service_name': daemon_spec.service_name,
                            'ports': daemon_spec.ports,
                            'ip': daemon_spec.ip,
                        }),
                        '--config-json', '-',
                    ] + daemon_spec.extra_args,
                    stdin=json.dumps(daemon_spec.final_config),
                    image=image)

                # refresh daemon state?  (ceph daemon reconfig does not need it)
                if not reconfig or daemon_spec.daemon_type not in CEPH_TYPES:
                    if not code and daemon_spec.host in self.mgr.cache.daemons:
                        # prime cached service state with what we (should have)
                        # just created
                        sd = daemon_spec.to_daemon_description(
                            DaemonDescriptionStatus.running, 'starting')
                        self.mgr.cache.add_daemon(daemon_spec.host, sd)
                        if daemon_spec.daemon_type in [
                            'grafana', 'iscsi', 'prometheus', 'alertmanager'
                        ]:
                            self.mgr.requires_post_actions.add(daemon_spec.daemon_type)
                    self.mgr.cache.invalidate_host_daemons(daemon_spec.host)

                self.mgr.cache.update_daemon_config_deps(
                    daemon_spec.host, daemon_spec.name(), daemon_spec.deps, start_time)
                self.mgr.cache.save_host(daemon_spec.host)
                msg = "{} {} on host '{}'".format(
                    'Reconfigured' if reconfig else 'Deployed', daemon_spec.name(), daemon_spec.host)
                if not code:
                    self.mgr.events.for_daemon(daemon_spec.name(), OrchestratorEvent.INFO, msg)
                else:
                    what = 'reconfigure' if reconfig else 'deploy'
                    self.mgr.events.for_daemon(
                        daemon_spec.name(), OrchestratorEvent.ERROR, f'Failed to {what}: {err}')
                return msg
            except OrchestratorError:
                if not reconfig:
                    # we have to clean up the daemon. E.g. keyrings.
                    servict_type = daemon_type_to_service(daemon_spec.daemon_type)
                    dd = daemon_spec.to_daemon_description(DaemonDescriptionStatus.error, 'failed')
                    self.mgr.cephadm_services[servict_type].post_remove(dd)
                raise
示例#22
0
    def run(self) -> None:
        try:
            try:
                old_creds = self.mgr.get_store('cephadm_endpoint_credentials')
                if not old_creds:
                    raise OrchestratorError(
                        'No old credentials for cephadm endpoint found')
                old_creds_dict = json.loads(old_creds)
                old_key = old_creds_dict['key']
                old_cert = old_creds_dict['cert']
                self.ssl_certs.load_root_credentials(old_cert, old_key)
            except (OrchestratorError, json.decoder.JSONDecodeError, KeyError,
                    ValueError):
                self.ssl_certs.generate_root_cert()

            cert, key = self.ssl_certs.generate_cert()

            self.key_tmp = tempfile.NamedTemporaryFile()
            self.key_tmp.write(key.encode('utf-8'))
            self.key_tmp.flush()  # pkey_tmp must not be gc'ed
            key_fname = self.key_tmp.name

            self.cert_tmp = tempfile.NamedTemporaryFile()
            self.cert_tmp.write(cert.encode('utf-8'))
            self.cert_tmp.flush()  # cert_tmp must not be gc'ed
            cert_fname = self.cert_tmp.name

            verify_tls_files(cert_fname, key_fname)

            cherrypy.config.update({
                'server.socket_host': self.server_addr,
                'server.socket_port': self.server_port,
                'engine.autoreload.on': False,
                'server.ssl_module': 'builtin',
                'server.ssl_certificate': cert_fname,
                'server.ssl_private_key': key_fname,
            })
            root_conf = {
                '/': {
                    'request.dispatch': cherrypy.dispatch.MethodDispatcher(),
                    'tools.response_headers.on': True
                }
            }
            cherrypy.tree.mount(Root(self.mgr), '/', root_conf)
            self.mgr.log.debug('Starting cherrypy engine...')
            self.start_engine()
            self.mgr.log.debug('Cherrypy engine started.')
            cephadm_endpoint_creds = {
                'cert': self.ssl_certs.get_root_cert(),
                'key': self.ssl_certs.get_root_key()
            }
            self.mgr.set_store('cephadm_endpoint_credentials',
                               json.dumps(cephadm_endpoint_creds))
            self.mgr._kick_serve_loop()
            # wait for the shutdown event
            self.cherrypy_shutdown_event.wait()
            self.cherrypy_shutdown_event.clear()
            cherrypy.engine.stop()
            self.mgr.log.debug('Cherrypy engine stopped.')
        except Exception as e:
            self.mgr.log.error(f'Failed to run cephadm cherrypy endpoint: {e}')
示例#23
0
    def config(self, spec: RGWSpec, rgw_id: str) -> None:  # type: ignore
        assert self.TYPE == spec.service_type

        # create realm, zonegroup, and zone if needed
        self.create_realm_zonegroup_zone(spec, rgw_id)

        # ensure rgw_realm and rgw_zone is set for these daemons
        ret, out, err = self.mgr.check_mon_command({
            'prefix': 'config set',
            'who': f"{utils.name_to_config_section('rgw')}.{spec.service_id}",
            'name': 'rgw_zone',
            'value': spec.rgw_zone,
        })
        ret, out, err = self.mgr.check_mon_command({
            'prefix': 'config set',
            'who': f"{utils.name_to_config_section('rgw')}.{spec.rgw_realm}",
            'name': 'rgw_realm',
            'value': spec.rgw_realm,
        })
        ret, out, err = self.mgr.check_mon_command({
            'prefix':
            'config set',
            'who':
            f"{utils.name_to_config_section('rgw')}.{spec.service_id}",
            'name':
            'rgw_frontends',
            'value':
            spec.rgw_frontends_config_value(),
        })

        if spec.rgw_frontend_ssl_certificate:
            if isinstance(spec.rgw_frontend_ssl_certificate, list):
                cert_data = '\n'.join(spec.rgw_frontend_ssl_certificate)
            elif isinstance(spec.rgw_frontend_ssl_certificate, str):
                cert_data = spec.rgw_frontend_ssl_certificate
            else:
                raise OrchestratorError(
                    'Invalid rgw_frontend_ssl_certificate: %s' %
                    spec.rgw_frontend_ssl_certificate)
            ret, out, err = self.mgr.check_mon_command({
                'prefix': 'config-key set',
                'key': f'rgw/cert/{spec.rgw_realm}/{spec.rgw_zone}.crt',
                'val': cert_data,
            })

        if spec.rgw_frontend_ssl_key:
            if isinstance(spec.rgw_frontend_ssl_key, list):
                key_data = '\n'.join(spec.rgw_frontend_ssl_key)
            elif isinstance(spec.rgw_frontend_ssl_certificate, str):
                key_data = spec.rgw_frontend_ssl_key
            else:
                raise OrchestratorError('Invalid rgw_frontend_ssl_key: %s' %
                                        spec.rgw_frontend_ssl_key)
            ret, out, err = self.mgr.check_mon_command({
                'prefix': 'config-key set',
                'key': f'rgw/cert/{spec.rgw_realm}/{spec.rgw_zone}.key',
                'val': key_data,
            })

        # TODO: fail, if we don't have a spec
        logger.info('Saving service %s spec with placement %s' %
                    (spec.service_name(), spec.placement.pretty_str()))
        self.mgr.spec_store.save(spec)
示例#24
0
 def __getitem__(self, name: str) -> SpecDescription:
     if name not in self._specs:
         raise OrchestratorError(f'Service {name} not found.')
     return SpecDescription(self._specs[name], self.spec_created[name],
                            self.spec_deleted.get(name, None))
示例#25
0
    def create_realm_zonegroup_zone(self, spec: RGWSpec, rgw_id: str) -> None:
        if utils.get_cluster_health(self.mgr) != 'HEALTH_OK':
            raise OrchestratorError(
                'Health not ok, will try again when health ok')

        # get keyring needed to run rados commands and strip out just the keyring
        keyring = self.get_keyring(rgw_id).split('key = ', 1)[1].rstrip()

        # We can call radosgw-admin within the container, cause cephadm gives the MGR the required keyring permissions

        def get_realms() -> List[str]:
            cmd = [
                'radosgw-admin',
                '--key=%s' % keyring, '--user',
                'rgw.%s' % rgw_id, 'realm', 'list', '--format=json'
            ]
            result = subprocess.run(cmd,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            out = result.stdout
            if not out:
                return []
            try:
                j = json.loads(out)
                return j.get('realms', [])
            except Exception:
                raise OrchestratorError('failed to parse realm info')

        def create_realm() -> None:
            cmd = [
                'radosgw-admin',
                '--key=%s' % keyring, '--user',
                'rgw.%s' % rgw_id, 'realm', 'create',
                '--rgw-realm=%s' % spec.rgw_realm, '--default'
            ]
            result = subprocess.run(cmd,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            if result.returncode:
                err = 'failed to create RGW realm "%s": %r' % (spec.rgw_realm,
                                                               result.stderr)
                raise OrchestratorError(err)
            self.mgr.log.info('created realm: %s' % spec.rgw_realm)

        def get_zonegroups() -> List[str]:
            cmd = [
                'radosgw-admin',
                '--key=%s' % keyring, '--user',
                'rgw.%s' % rgw_id, 'zonegroup', 'list', '--format=json'
            ]
            result = subprocess.run(cmd,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            out = result.stdout
            if not out:
                return []
            try:
                j = json.loads(out)
                return j.get('zonegroups', [])
            except Exception:
                raise OrchestratorError('failed to parse zonegroup info')

        def create_zonegroup() -> None:
            cmd = [
                'radosgw-admin',
                '--key=%s' % keyring, '--user',
                'rgw.%s' % rgw_id, 'zonegroup', 'create',
                '--rgw-zonegroup=default', '--master', '--default'
            ]
            result = subprocess.run(cmd,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            if result.returncode:
                err = 'failed to create RGW zonegroup "%s": %r' % (
                    'default', result.stderr)
                raise OrchestratorError(err)
            self.mgr.log.info('created zonegroup: default')

        def create_zonegroup_if_required() -> None:
            zonegroups = get_zonegroups()
            if 'default' not in zonegroups:
                create_zonegroup()

        def get_zones() -> List[str]:
            cmd = [
                'radosgw-admin',
                '--key=%s' % keyring, '--user',
                'rgw.%s' % rgw_id, 'zone', 'list', '--format=json'
            ]
            result = subprocess.run(cmd,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            out = result.stdout
            if not out:
                return []
            try:
                j = json.loads(out)
                return j.get('zones', [])
            except Exception:
                raise OrchestratorError('failed to parse zone info')

        def create_zone() -> None:
            cmd = [
                'radosgw-admin',
                '--key=%s' % keyring, '--user',
                'rgw.%s' % rgw_id, 'zone', 'create', '--rgw-zonegroup=default',
                '--rgw-zone=%s' % spec.rgw_zone, '--master', '--default'
            ]
            result = subprocess.run(cmd,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            if result.returncode:
                err = 'failed to create RGW zone "%s": %r' % (spec.rgw_zone,
                                                              result.stderr)
                raise OrchestratorError(err)
            self.mgr.log.info('created zone: %s' % spec.rgw_zone)

        changes = False
        realms = get_realms()
        if spec.rgw_realm not in realms:
            create_realm()
            changes = True

        zones = get_zones()
        if spec.rgw_zone not in zones:
            create_zonegroup_if_required()
            create_zone()
            changes = True

        # update period if changes were made
        if changes:
            cmd = [
                'radosgw-admin',
                '--key=%s' % keyring, '--user',
                'rgw.%s' % rgw_id, 'period', 'update',
                '--rgw-realm=%s' % spec.rgw_realm, '--commit'
            ]
            result = subprocess.run(cmd,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            if result.returncode:
                err = 'failed to update RGW period: %r' % (result.stderr)
                raise OrchestratorError(err)
            self.mgr.log.info('updated period')
示例#26
0
文件: osd.py 项目: whaddock/ceph
    def prepare_drivegroup(
            self,
            drive_group: DriveGroupSpec) -> List[Tuple[str, DriveSelection]]:
        # 1) use fn_filter to determine matching_hosts
        matching_hosts = drive_group.placement.filter_matching_hostspecs(
            self.mgr.cache.get_schedulable_hosts())
        # 2) Map the inventory to the InventoryHost object
        host_ds_map = []

        # set osd_id_claims

        def _find_inv_for_host(hostname: str,
                               inventory_dict: dict) -> List[Device]:
            # This is stupid and needs to be loaded with the host
            for _host, _inventory in inventory_dict.items():
                if _host == hostname:
                    return _inventory
            raise OrchestratorError(
                "No inventory found for host: {}".format(hostname))

        # 3) iterate over matching_host and call DriveSelection
        logger.debug(f"Checking matching hosts -> {matching_hosts}")
        for host in matching_hosts:
            inventory_for_host = _find_inv_for_host(host,
                                                    self.mgr.cache.devices)
            logger.debug(f"Found inventory for host {inventory_for_host}")

            # List of Daemons on that host
            dd_for_spec = self.mgr.cache.get_daemons_by_service(
                drive_group.service_name())
            dd_for_spec_and_host = [
                dd for dd in dd_for_spec if dd.hostname == host
            ]

            drive_selection = DriveSelection(
                drive_group,
                inventory_for_host,
                existing_daemons=len(dd_for_spec_and_host))
            logger.debug(f"Found drive selection {drive_selection}")
            if drive_group.method and drive_group.method == 'raw':
                # ceph-volume can currently only handle a 1:1 mapping
                # of data/db/wal devices for raw mode osds. If db/wal devices
                # are defined and the number does not match the number of data
                # devices, we need to bail out
                if drive_selection.data_devices(
                ) and drive_selection.db_devices():
                    if len(drive_selection.data_devices()) != len(
                            drive_selection.db_devices()):
                        raise OrchestratorError(
                            'Raw mode only supports a 1:1 ratio of data to db devices. Found '
                            f'{len(drive_selection.data_devices())} potential data device(s) and '
                            f'{len(drive_selection.db_devices())} potential db device(s) on host {host}'
                        )
                if drive_selection.data_devices(
                ) and drive_selection.wal_devices():
                    if len(drive_selection.data_devices()) != len(
                            drive_selection.wal_devices()):
                        raise OrchestratorError(
                            'Raw mode only supports a 1:1 ratio of data to wal devices. Found '
                            f'{len(drive_selection.data_devices())} potential data device(s) and '
                            f'{len(drive_selection.wal_devices())} potential wal device(s) on host {host}'
                        )
            host_ds_map.append((host, drive_selection))
        return host_ds_map