def purge(self, service_name: str) -> None: self.mgr.check_mon_command({ 'prefix': 'config rm', 'who': utils.name_to_config_section(service_name), 'name': 'rgw_realm', }) self.mgr.check_mon_command({ 'prefix': 'config rm', 'who': utils.name_to_config_section(service_name), 'name': 'rgw_zone', }) self.mgr.check_mon_command({ 'prefix': 'config-key rm', 'key': f'rgw/cert/{service_name}', })
def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: super().post_remove(daemon, is_failed_deploy=is_failed_deploy) self.mgr.check_mon_command({ 'prefix': 'config rm', 'who': utils.name_to_config_section(daemon.name()), 'name': 'rgw_frontends', })
def create(self, igw_id, host, spec) -> str: ret, keyring, err = self.mgr.check_mon_command({ 'prefix': 'auth get-or-create', 'entity': utils.name_to_config_section('iscsi') + '.' + igw_id, 'caps': [ 'mon', 'profile rbd, ' 'allow command "osd blacklist", ' 'allow command "config-key get" with "key" prefix "iscsi/"', 'osd', f'allow rwx pool={spec.pool}' ], }) if spec.ssl_cert: if isinstance(spec.ssl_cert, list): cert_data = '\n'.join(spec.ssl_cert) else: cert_data = spec.ssl_cert ret, out, err = self.mgr.mon_command({ 'prefix': 'config-key set', 'key': f'iscsi/{utils.name_to_config_section("iscsi")}.{igw_id}/iscsi-gateway.crt', 'val': cert_data, }) if spec.ssl_key: if isinstance(spec.ssl_key, list): key_data = '\n'.join(spec.ssl_key) else: key_data = spec.ssl_key ret, out, err = self.mgr.mon_command({ 'prefix': 'config-key set', 'key': f'iscsi/{utils.name_to_config_section("iscsi")}.{igw_id}/iscsi-gateway.key', 'val': key_data, }) api_secure = 'false' if spec.api_secure is None else spec.api_secure igw_conf = f""" # generated by cephadm [config] cluster_client_name = {utils.name_to_config_section('iscsi')}.{igw_id} pool = {spec.pool} trusted_ip_list = {spec.trusted_ip_list or ''} minimum_gateways = 1 api_port = {spec.api_port or ''} api_user = {spec.api_user or ''} api_password = {spec.api_password or ''} api_secure = {api_secure} """ extra_config = {'iscsi-gateway.cfg': igw_conf} return self.mgr._create_daemon('iscsi', igw_id, host, keyring=keyring, extra_config=extra_config)
def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: assert self.TYPE == daemon_spec.daemon_type rgw_id, _ = daemon_spec.daemon_id, daemon_spec.host spec = cast(RGWSpec, self.mgr.spec_store[daemon_spec.service_name].spec) keyring = self.get_keyring(rgw_id) if daemon_spec.ports: port = daemon_spec.ports[0] else: # this is a redeploy of older instance that doesn't have an explicitly # assigned port, in which case we can assume there is only 1 per host # and it matches the spec. port = spec.get_port() # configure frontend args = [] ftype = spec.rgw_frontend_type or "beast" if ftype == 'beast': if spec.ssl: if daemon_spec.ip: args.append( f"ssl_endpoint={build_url(host=daemon_spec.ip, port=port).lstrip('/')}") else: args.append(f"ssl_port={port}") args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}") else: if daemon_spec.ip: args.append(f"endpoint={build_url(host=daemon_spec.ip, port=port).lstrip('/')}") else: args.append(f"port={port}") elif ftype == 'civetweb': if spec.ssl: if daemon_spec.ip: # note the 's' suffix on port args.append(f"port={build_url(host=daemon_spec.ip, port=port).lstrip('/')}s") else: args.append(f"port={port}s") # note the 's' suffix on port args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}") else: if daemon_spec.ip: args.append(f"port={build_url(host=daemon_spec.ip, port=port).lstrip('/')}") else: args.append(f"port={port}") frontend = f'{ftype} {" ".join(args)}' ret, out, err = self.mgr.check_mon_command({ 'prefix': 'config set', 'who': utils.name_to_config_section(daemon_spec.name()), 'name': 'rgw_frontends', 'value': frontend }) daemon_spec.keyring = keyring daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) return daemon_spec
def generate_config( self, daemon_spec: CephadmDaemonSpec[NFSServiceSpec] ) -> Tuple[Dict[str, Any], List[str]]: assert self.TYPE == daemon_spec.daemon_type assert daemon_spec.spec daemon_type = daemon_spec.daemon_type daemon_id = daemon_spec.daemon_id host = daemon_spec.host spec = daemon_spec.spec deps: List[str] = [] # create the keyring user = f'{daemon_type}.{daemon_id}' entity = utils.name_to_config_section(user) keyring = self.get_or_create_keyring(entity) # update the caps after get-or-create, the keyring might already exist! self.update_keyring_caps(entity, spec) # create the rados config object self.create_rados_config_obj(spec) # generate the ganesha config def get_ganesha_conf() -> str: context = dict(user=user, nodeid=daemon_spec.name(), pool=spec.pool, namespace=spec.namespace if spec.namespace else '', url=spec.rados_config_location()) return self.mgr.template.render('services/nfs/ganesha.conf.j2', context) # generate the cephadm config json def get_cephadm_config() -> Dict[str, Any]: config: Dict[str, Any] = {} config['pool'] = spec.pool if spec.namespace: config['namespace'] = spec.namespace config['userid'] = user config['extra_args'] = ['-N', 'NIV_EVENT'] config['files'] = { 'ganesha.conf': get_ganesha_conf(), } config.update( self.mgr._get_config_and_keyring(daemon_type, daemon_id, keyring=keyring, host=host)) logger.debug('Generated cephadm config-json: %s' % config) return config return get_cephadm_config(), deps
def _do_upgrade(self): # type: () -> None if not self.upgrade_state: logger.debug('_do_upgrade no state, exiting') return target_image = self.target_image target_id = self.upgrade_state.target_id target_digests = self.upgrade_state.target_digests target_version = self.upgrade_state.target_version first = False if not target_id or not target_version or not target_digests: # need to learn the container hash logger.info('Upgrade: First pull of %s' % target_image) self.upgrade_info_str = 'Doing first pull of %s image' % ( target_image) try: target_id, target_version, target_digests = CephadmServe( self.mgr)._get_container_image_info(target_image) except OrchestratorError as e: self._fail_upgrade( 'UPGRADE_FAILED_PULL', { 'severity': 'warning', 'summary': 'Upgrade: failed to pull target image', 'count': 1, 'detail': [str(e)], }) return if not target_version: self._fail_upgrade( 'UPGRADE_FAILED_PULL', { 'severity': 'warning', 'summary': 'Upgrade: failed to pull target image', 'count': 1, 'detail': ['unable to extract ceph version from container'], }) return self.upgrade_state.target_id = target_id # extract the version portion of 'ceph version {version} ({sha1})' self.upgrade_state.target_version = target_version.split(' ')[2] self.upgrade_state.target_digests = target_digests self._save_upgrade_state() target_image = self.target_image first = True if target_digests is None: target_digests = [] if target_version.startswith('ceph version '): # tolerate/fix upgrade state from older version self.upgrade_state.target_version = target_version.split(' ')[2] target_version = self.upgrade_state.target_version target_major, target_minor, target_patch = target_version.split('.') target_major_name = self.mgr.lookup_release_name(int(target_major)) if first: logger.info('Upgrade: Target is version %s (%s)' % (target_version, target_major_name)) logger.info('Upgrade: Target container is %s, digests %s' % (target_image, target_digests)) version_error = self._check_target_version(target_version) if version_error: self._fail_upgrade( 'UPGRADE_BAD_TARGET_VERSION', { 'severity': 'error', 'summary': f'Upgrade: cannot upgrade/downgrade to {target_version}', 'count': 1, 'detail': [version_error], }) return image_settings = self.get_distinct_container_image_settings() daemons = [ d for d in self.mgr.cache.get_daemons() if d.daemon_type in CEPH_UPGRADE_ORDER ] done = 0 for daemon_type in CEPH_UPGRADE_ORDER: logger.debug('Upgrade: Checking %s daemons' % daemon_type) need_upgrade_self = False need_upgrade: List[Tuple[DaemonDescription, bool]] = [] need_upgrade_deployer: List[Tuple[DaemonDescription, bool]] = [] for d in daemons: if d.daemon_type != daemon_type: continue assert d.daemon_type is not None assert d.daemon_id is not None correct_digest = False if (any(d in target_digests for d in (d.container_image_digests or [])) or d.daemon_type in MONITORING_STACK_TYPES): logger.debug('daemon %s.%s container digest correct' % (daemon_type, d.daemon_id)) correct_digest = True if any(d in target_digests for d in (d.deployed_by or [])): logger.debug( 'daemon %s.%s deployed by correct version' % (d.daemon_type, d.daemon_id)) done += 1 continue if self.mgr.daemon_is_self(d.daemon_type, d.daemon_id): logger.info('Upgrade: Need to upgrade myself (mgr.%s)' % self.mgr.get_mgr_id()) need_upgrade_self = True continue if correct_digest: logger.debug( 'daemon %s.%s not deployed by correct version' % (d.daemon_type, d.daemon_id)) need_upgrade_deployer.append((d, True)) else: logger.debug( 'daemon %s.%s not correct (%s, %s, %s)' % (daemon_type, d.daemon_id, d.container_image_name, d.container_image_digests, d.version)) need_upgrade.append((d, False)) if not need_upgrade_self: # only after the mgr itself is upgraded can we expect daemons to have # deployed_by == target_digests need_upgrade += need_upgrade_deployer # prepare filesystems for daemon upgrades? if (daemon_type == 'mds' and need_upgrade and not self._prepare_for_mds_upgrade( target_major, [d_entry[0] for d_entry in need_upgrade])): return if need_upgrade: self.upgrade_info_str = 'Currently upgrading %s daemons' % ( daemon_type) to_upgrade: List[Tuple[DaemonDescription, bool]] = [] known_ok_to_stop: List[str] = [] for d_entry in need_upgrade: d = d_entry[0] assert d.daemon_type is not None assert d.daemon_id is not None assert d.hostname is not None if not d.container_image_id: if d.container_image_name == target_image: logger.debug( 'daemon %s has unknown container_image_id but has correct image name' % (d.name())) continue if known_ok_to_stop: if d.name() in known_ok_to_stop: logger.info( f'Upgrade: {d.name()} is also safe to restart') to_upgrade.append(d_entry) continue if d.daemon_type in ['mon', 'osd', 'mds']: # NOTE: known_ok_to_stop is an output argument for # _wait_for_ok_to_stop if not self._wait_for_ok_to_stop(d, known_ok_to_stop): return to_upgrade.append(d_entry) # if we don't have a list of others to consider, stop now if not known_ok_to_stop: break num = 1 for d_entry in to_upgrade: d = d_entry[0] assert d.daemon_type is not None assert d.daemon_id is not None assert d.hostname is not None self._update_upgrade_progress(done / len(daemons)) # make sure host has latest container image out, errs, code = CephadmServe(self.mgr)._run_cephadm( d.hostname, '', 'inspect-image', [], image=target_image, no_fsid=True, error_ok=True) if code or not any(d in target_digests for d in json.loads(''.join(out)).get( 'repo_digests', [])): logger.info('Upgrade: Pulling %s on %s' % (target_image, d.hostname)) self.upgrade_info_str = 'Pulling %s image on host %s' % ( target_image, d.hostname) out, errs, code = CephadmServe(self.mgr)._run_cephadm( d.hostname, '', 'pull', [], image=target_image, no_fsid=True, error_ok=True) if code: self._fail_upgrade( 'UPGRADE_FAILED_PULL', { 'severity': 'warning', 'summary': 'Upgrade: failed to pull target image', 'count': 1, 'detail': [ 'failed to pull %s on host %s' % (target_image, d.hostname) ], }) return r = json.loads(''.join(out)) if not any(d in target_digests for d in r.get('repo_digests', [])): logger.info( 'Upgrade: image %s pull on %s got new digests %s (not %s), restarting' % (target_image, d.hostname, r['repo_digests'], target_digests)) self.upgrade_info_str = 'Image %s pull on %s got new digests %s (not %s), restarting' % ( target_image, d.hostname, r['repo_digests'], target_digests) self.upgrade_state.target_digests = r['repo_digests'] self._save_upgrade_state() return self.upgrade_info_str = 'Currently upgrading %s daemons' % ( daemon_type) if len(to_upgrade) > 1: logger.info( 'Upgrade: Updating %s.%s (%d/%d)' % (d.daemon_type, d.daemon_id, num, len(to_upgrade))) else: logger.info('Upgrade: Updating %s.%s' % (d.daemon_type, d.daemon_id)) action = 'Upgrading' if not d_entry[1] else 'Redeploying' try: daemon_spec = CephadmDaemonDeploySpec.from_daemon_description( d) self.mgr._daemon_action( daemon_spec, 'redeploy', image=target_image if not d_entry[1] else None) except Exception as e: self._fail_upgrade( 'UPGRADE_REDEPLOY_DAEMON', { 'severity': 'warning', 'summary': f'{action} daemon {d.name()} on host {d.hostname} failed.', 'count': 1, 'detail': [f'Upgrade daemon: {d.name()}: {e}'], }) return num += 1 if to_upgrade: return # complete mon upgrade? if daemon_type == 'mon': if not self.mgr.get("have_local_config_map"): logger.info( 'Upgrade: Restarting mgr now that mons are running pacific' ) need_upgrade_self = True if need_upgrade_self: try: self.mgr.mgr_service.fail_over() except OrchestratorError as e: self._fail_upgrade( 'UPGRADE_NO_STANDBY_MGR', { 'severity': 'warning', 'summary': f'Upgrade: {e}', 'count': 1, 'detail': [ 'The upgrade process needs to upgrade the mgr, ' 'but it needs at least one standby to proceed.', ], }) return return # unreachable code, as fail_over never returns elif daemon_type == 'mgr': if 'UPGRADE_NO_STANDBY_MGR' in self.mgr.health_checks: del self.mgr.health_checks['UPGRADE_NO_STANDBY_MGR'] self.mgr.set_health_checks(self.mgr.health_checks) # make sure 'ceph versions' agrees ret, out_ver, err = self.mgr.check_mon_command({ 'prefix': 'versions', }) j = json.loads(out_ver) for version, count in j.get(daemon_type, {}).items(): short_version = version.split(' ')[2] if short_version != target_version: logger.warning( 'Upgrade: %d %s daemon(s) are %s != target %s' % (count, daemon_type, short_version, target_version)) # push down configs daemon_type_section = name_to_config_section(daemon_type) if image_settings.get(daemon_type_section) != target_image: logger.info('Upgrade: Setting container_image for all %s' % daemon_type) self.mgr.set_container_image(daemon_type_section, target_image) to_clean = [] for section in image_settings.keys(): if section.startswith( name_to_config_section(daemon_type) + '.'): to_clean.append(section) if to_clean: logger.debug('Upgrade: Cleaning up container_image for %s' % to_clean) for section in to_clean: ret, image, err = self.mgr.check_mon_command({ 'prefix': 'config rm', 'name': 'container_image', 'who': section, }) logger.debug('Upgrade: All %s daemons are up to date.' % daemon_type) # complete osd upgrade? if daemon_type == 'osd': osdmap = self.mgr.get("osd_map") osd_min_name = osdmap.get("require_osd_release", "argonaut") osd_min = ceph_release_to_major(osd_min_name) if osd_min < int(target_major): logger.info( f'Upgrade: Setting require_osd_release to {target_major} {target_major_name}' ) ret, _, err = self.mgr.check_mon_command({ 'prefix': 'osd require-osd-release', 'release': target_major_name, }) # complete mds upgrade? if daemon_type == 'mds' and self.upgrade_state.fs_original_max_mds: for i in self.mgr.get("fs_map")['filesystems']: fs_id = i["id"] fs_name = i['mdsmap']['fs_name'] new_max = self.upgrade_state.fs_original_max_mds.get(fs_id) if new_max: self.mgr.log.info( 'Upgrade: Scaling up filesystem %s max_mds to %d' % (fs_name, new_max)) ret, _, err = self.mgr.check_mon_command({ 'prefix': 'fs set', 'fs_name': fs_name, 'var': 'max_mds', 'val': str(new_max), }) self.upgrade_state.fs_original_max_mds = {} self._save_upgrade_state() # clean up logger.info('Upgrade: Finalizing container_image settings') self.mgr.set_container_image('global', target_image) for daemon_type in CEPH_UPGRADE_ORDER: ret, image, err = self.mgr.check_mon_command({ 'prefix': 'config rm', 'name': 'container_image', 'who': name_to_config_section(daemon_type), }) logger.info('Upgrade: Complete!') if self.upgrade_state.progress_id: self.mgr.remote('progress', 'complete', self.upgrade_state.progress_id) self.upgrade_state = None self._save_upgrade_state() return
def _do_upgrade(self): # type: () -> None if not self.upgrade_state: logger.debug('_do_upgrade no state, exiting') return target_name = self.upgrade_state.target_name target_id = self.upgrade_state.target_id if not target_id: # need to learn the container hash logger.info('Upgrade: First pull of %s' % target_name) try: target_id, target_version = self.mgr._get_container_image_id( target_name) except OrchestratorError as e: self._fail_upgrade( 'UPGRADE_FAILED_PULL', { 'severity': 'warning', 'summary': 'Upgrade: failed to pull target image', 'count': 1, 'detail': [str(e)], }) return self.upgrade_state.target_id = target_id self.upgrade_state.target_version = target_version self._save_upgrade_state() target_version = self.upgrade_state.target_version logger.info('Upgrade: Target is %s with id %s' % (target_name, target_id)) # get all distinct container_image settings image_settings = {} ret, out, err = self.mgr.check_mon_command({ 'prefix': 'config dump', 'format': 'json', }) config = json.loads(out) for opt in config: if opt['name'] == 'container_image': image_settings[opt['section']] = opt['value'] daemons = self.mgr.cache.get_daemons() done = 0 for daemon_type in CEPH_UPGRADE_ORDER: logger.info('Upgrade: Checking %s daemons...' % daemon_type) need_upgrade_self = False for d in daemons: if d.daemon_type != daemon_type: continue if d.container_image_id == target_id: logger.debug('daemon %s.%s version correct' % (daemon_type, d.daemon_id)) done += 1 continue logger.debug('daemon %s.%s not correct (%s, %s, %s)' % (daemon_type, d.daemon_id, d.container_image_name, d.container_image_id, d.version)) if daemon_type == 'mgr' and \ d.daemon_id == self.mgr.get_mgr_id(): logger.info('Upgrade: Need to upgrade myself (mgr.%s)' % self.mgr.get_mgr_id()) need_upgrade_self = True continue # make sure host has latest container image out, err, code = self.mgr._run_cephadm(d.hostname, '', 'inspect-image', [], image=target_name, no_fsid=True, error_ok=True) if code or json.loads( ''.join(out)).get('image_id') != target_id: logger.info('Upgrade: Pulling %s on %s' % (target_name, d.hostname)) out, err, code = self.mgr._run_cephadm(d.hostname, '', 'pull', [], image=target_name, no_fsid=True, error_ok=True) if code: self._fail_upgrade( 'UPGRADE_FAILED_PULL', { 'severity': 'warning', 'summary': 'Upgrade: failed to pull target image', 'count': 1, 'detail': [ 'failed to pull %s on host %s' % (target_name, d.hostname) ], }) return r = json.loads(''.join(out)) if r.get('image_id') != target_id: logger.info( 'Upgrade: image %s pull on %s got new image %s (not %s), restarting' % (target_name, d.hostname, r['image_id'], target_id)) self.upgrade_state.target_id = r['image_id'] self._save_upgrade_state() return self._update_upgrade_progress(done / len(daemons)) if not d.container_image_id: if d.container_image_name == target_name: logger.debug( 'daemon %s has unknown container_image_id but has correct image name' % (d.name())) continue if not self._wait_for_ok_to_stop(d): return logger.info('Upgrade: Redeploying %s.%s' % (d.daemon_type, d.daemon_id)) ret, out, err = self.mgr.check_mon_command({ 'prefix': 'config set', 'name': 'container_image', 'value': target_name, 'who': name_to_config_section(daemon_type + '.' + d.daemon_id), }) self.mgr._daemon_action(d.daemon_type, d.daemon_id, d.hostname, 'redeploy') return if need_upgrade_self: mgr_map = self.mgr.get('mgr_map') num = len(mgr_map.get('standbys')) if not num: self._fail_upgrade( 'UPGRADE_NO_STANDBY_MGR', { 'severity': 'warning', 'summary': 'Upgrade: Need standby mgr daemon', 'count': 1, 'detail': [ 'The upgrade process needs to upgrade the mgr, ' 'but it needs at least one standby to proceed.', ], }) return logger.info('Upgrade: there are %d other already-upgraded ' 'standby mgrs, failing over' % num) self._update_upgrade_progress(done / len(daemons)) # fail over ret, out, err = self.mgr.check_mon_command({ 'prefix': 'mgr fail', 'who': self.mgr.get_mgr_id(), }) return elif daemon_type == 'mgr': if 'UPGRADE_NO_STANDBY_MGR' in self.mgr.health_checks: del self.mgr.health_checks['UPGRADE_NO_STANDBY_MGR'] self.mgr.set_health_checks(self.mgr.health_checks) # make sure 'ceph versions' agrees ret, out_ver, err = self.mgr.check_mon_command({ 'prefix': 'versions', }) j = json.loads(out_ver) for version, count in j.get(daemon_type, {}).items(): if version != target_version: logger.warning( 'Upgrade: %d %s daemon(s) are %s != target %s' % (count, daemon_type, version, target_version)) # push down configs if image_settings.get(daemon_type) != target_name: logger.info('Upgrade: Setting container_image for all %s...' % daemon_type) ret, out, err = self.mgr.check_mon_command({ 'prefix': 'config set', 'name': 'container_image', 'value': target_name, 'who': name_to_config_section(daemon_type), }) to_clean = [] for section in image_settings.keys(): if section.startswith( name_to_config_section(daemon_type) + '.'): to_clean.append(section) if to_clean: logger.debug('Upgrade: Cleaning up container_image for %s...' % to_clean) for section in to_clean: ret, image, err = self.mgr.check_mon_command({ 'prefix': 'config rm', 'name': 'container_image', 'who': section, }) logger.info('Upgrade: All %s daemons are up to date.' % daemon_type) # clean up logger.info('Upgrade: Finalizing container_image settings') ret, out, err = self.mgr.check_mon_command({ 'prefix': 'config set', 'name': 'container_image', 'value': target_name, 'who': 'global', }) for daemon_type in CEPH_UPGRADE_ORDER: ret, image, err = self.mgr.check_mon_command({ 'prefix': 'config rm', 'name': 'container_image', 'who': name_to_config_section(daemon_type), }) logger.info('Upgrade: Complete!') if self.upgrade_state.progress_id: self.mgr.remote('progress', 'complete', self.upgrade_state.progress_id) self.upgrade_state = None self._save_upgrade_state() return
def get_keyring_entity(self) -> str: return utils.name_to_config_section(self.get_rados_user())
def _do_upgrade(self): # type: () -> None if not self.upgrade_state: logger.debug('_do_upgrade no state, exiting') return target_image = self.target_image target_id = self.upgrade_state.target_id if not target_id or (self.mgr.use_repo_digest and not self.upgrade_state.repo_digest): # need to learn the container hash logger.info('Upgrade: First pull of %s' % target_image) try: target_id, target_version, repo_digest = CephadmServe( self.mgr)._get_container_image_info(target_image) except OrchestratorError as e: self._fail_upgrade( 'UPGRADE_FAILED_PULL', { 'severity': 'warning', 'summary': 'Upgrade: failed to pull target image', 'count': 1, 'detail': [str(e)], }) return self.upgrade_state.target_id = target_id self.upgrade_state.target_version = target_version self.upgrade_state.repo_digest = repo_digest self._save_upgrade_state() target_image = self.target_image target_version = self.upgrade_state.target_version logger.info('Upgrade: Target is %s with id %s' % (target_image, target_id)) image_settings = self.get_distinct_container_image_settings() daemons = self.mgr.cache.get_daemons() done = 0 for daemon_type in CEPH_UPGRADE_ORDER: logger.info('Upgrade: Checking %s daemons...' % daemon_type) need_upgrade_self = False for d in daemons: if d.daemon_type != daemon_type: continue if d.container_image_id == target_id: logger.debug('daemon %s.%s version correct' % (daemon_type, d.daemon_id)) done += 1 continue logger.debug('daemon %s.%s not correct (%s, %s, %s)' % (daemon_type, d.daemon_id, d.container_image_name, d.container_image_id, d.version)) assert d.daemon_type is not None assert d.daemon_id is not None assert d.hostname is not None if self.mgr.daemon_is_self(d.daemon_type, d.daemon_id): logger.info('Upgrade: Need to upgrade myself (mgr.%s)' % self.mgr.get_mgr_id()) need_upgrade_self = True continue # make sure host has latest container image out, err, code = CephadmServe(self.mgr)._run_cephadm( d.hostname, '', 'inspect-image', [], image=target_image, no_fsid=True, error_ok=True) if code or json.loads( ''.join(out)).get('image_id') != target_id: logger.info('Upgrade: Pulling %s on %s' % (target_image, d.hostname)) out, err, code = CephadmServe(self.mgr)._run_cephadm( d.hostname, '', 'pull', [], image=target_image, no_fsid=True, error_ok=True) if code: self._fail_upgrade( 'UPGRADE_FAILED_PULL', { 'severity': 'warning', 'summary': 'Upgrade: failed to pull target image', 'count': 1, 'detail': [ 'failed to pull %s on host %s' % (target_image, d.hostname) ], }) return r = json.loads(''.join(out)) if r.get('image_id') != target_id: logger.info( 'Upgrade: image %s pull on %s got new image %s (not %s), restarting' % (target_image, d.hostname, r['image_id'], target_id)) self.upgrade_state.target_id = r['image_id'] self._save_upgrade_state() return self._update_upgrade_progress(done / len(daemons)) if not d.container_image_id: if d.container_image_name == target_image: logger.debug( 'daemon %s has unknown container_image_id but has correct image name' % (d.name())) continue if not self._wait_for_ok_to_stop(d): return logger.info('Upgrade: Redeploying %s.%s' % (d.daemon_type, d.daemon_id)) try: self.mgr._daemon_action(d.daemon_type, d.daemon_id, d.hostname, 'redeploy', image=target_image) except Exception as e: self._fail_upgrade( 'UPGRADE_REDEPLOY_DAEMON', { 'severity': 'warning', 'summary': f'Upgrading daemon {d.name()} on host {d.hostname} failed.', 'count': 1, 'detail': [f'Upgrade daemon: {d.name()}: {e}'], }) return if need_upgrade_self: try: self.mgr.mgr_service.fail_over() except OrchestratorError as e: self._fail_upgrade( 'UPGRADE_NO_STANDBY_MGR', { 'severity': 'warning', 'summary': f'Upgrade: {e}', 'count': 1, 'detail': [ 'The upgrade process needs to upgrade the mgr, ' 'but it needs at least one standby to proceed.', ], }) return return # unreachable code, as fail_over never returns elif daemon_type == 'mgr': if 'UPGRADE_NO_STANDBY_MGR' in self.mgr.health_checks: del self.mgr.health_checks['UPGRADE_NO_STANDBY_MGR'] self.mgr.set_health_checks(self.mgr.health_checks) # make sure 'ceph versions' agrees ret, out_ver, err = self.mgr.check_mon_command({ 'prefix': 'versions', }) j = json.loads(out_ver) for version, count in j.get(daemon_type, {}).items(): if version != target_version: logger.warning( 'Upgrade: %d %s daemon(s) are %s != target %s' % (count, daemon_type, version, target_version)) # push down configs if image_settings.get(daemon_type) != target_image: logger.info('Upgrade: Setting container_image for all %s...' % daemon_type) self.mgr.set_container_image( name_to_config_section(daemon_type), target_image) to_clean = [] for section in image_settings.keys(): if section.startswith( name_to_config_section(daemon_type) + '.'): to_clean.append(section) if to_clean: logger.debug('Upgrade: Cleaning up container_image for %s...' % to_clean) for section in to_clean: ret, image, err = self.mgr.check_mon_command({ 'prefix': 'config rm', 'name': 'container_image', 'who': section, }) logger.info('Upgrade: All %s daemons are up to date.' % daemon_type) # clean up logger.info('Upgrade: Finalizing container_image settings') self.mgr.set_container_image('global', target_image) for daemon_type in CEPH_UPGRADE_ORDER: ret, image, err = self.mgr.check_mon_command({ 'prefix': 'config rm', 'name': 'container_image', 'who': name_to_config_section(daemon_type), }) logger.info('Upgrade: Complete!') if self.upgrade_state.progress_id: self.mgr.remote('progress', 'complete', self.upgrade_state.progress_id) self.upgrade_state = None self._save_upgrade_state() return