def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth=None, host: str = '', status_running=False) -> Iterator[List[str]]: if spec.placement.is_empty() and host: spec.placement = PlacementSpec(hosts=[host], count=1) if meth is not None: c = meth(cephadm_module, spec) assert wait(cephadm_module, c) == f'Scheduled {spec.service_name()} update...' else: c = cephadm_module.apply([spec]) assert wait(cephadm_module, c) == [f'Scheduled {spec.service_name()} update...'] specs = [ d.spec for d in wait(cephadm_module, cephadm_module.describe_service()) ] assert spec in specs CephadmServe(cephadm_module)._apply_all_services() if status_running: make_daemons_running(cephadm_module, spec.service_name()) dds = wait(cephadm_module, cephadm_module.list_daemons()) own_dds = [dd for dd in dds if dd.service_name() == spec.service_name()] if host and spec.service_type != 'osd': assert own_dds yield [dd.name() for dd in own_dds] assert_rm_service(cephadm_module, spec.service_name())
def fence_old_ranks(self, spec: ServiceSpec, rank_map: Dict[int, Dict[int, Optional[str]]], num_ranks: int) -> None: for rank, m in list(rank_map.items()): if rank >= num_ranks: for daemon_id in m.values(): if daemon_id is not None: self.fence(daemon_id) del rank_map[rank] nodeid = f'{spec.service_name()}.{rank}' self.mgr.log.info( f'Removing {nodeid} from the ganesha grace table') self.run_grace_tool(cast(NFSServiceSpec, spec), 'remove', nodeid) self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map) else: max_gen = max(m.keys()) for gen, daemon_id in list(m.items()): if gen < max_gen: if daemon_id is not None: self.fence(daemon_id) del rank_map[rank][gen] self.mgr.spec_store.save_rank_map( spec.service_name(), rank_map)
def test_daemon_ok_to_stop(self, ok_to_stop, cephadm_module: CephadmOrchestrator): spec = ServiceSpec('mds', service_id='fsname', placement=PlacementSpec(hosts=['host1', 'host2'])) with with_host(cephadm_module, 'host1'), with_host(cephadm_module, 'host2'): c = cephadm_module.apply_mds(spec) out = wait(cephadm_module, c) match_glob(out, "Scheduled mds.fsname update...") CephadmServe(cephadm_module)._apply_all_services() [daemon] = cephadm_module.cache.daemons['host1'].keys() spec.placement.set_hosts(['host2']) ok_to_stop.side_effect = False c = cephadm_module.apply_mds(spec) out = wait(cephadm_module, c) match_glob(out, "Scheduled mds.fsname update...") CephadmServe(cephadm_module)._apply_all_services() ok_to_stop.assert_called_with([daemon[4:]]) assert_rm_daemon(cephadm_module, spec.service_name(), 'host1') # verifies ok-to-stop assert_rm_daemon(cephadm_module, spec.service_name(), 'host2')
def osdspec_needs_apply(self, host: str, spec: ServiceSpec) -> bool: if (host not in self.devices or host not in self.last_device_change or host not in self.last_device_update or host not in self.osdspec_last_applied or spec.service_name() not in self.osdspec_last_applied[host]): return True created = self.mgr.spec_store.get_created(spec) if not created or created > self.last_device_change[host]: return True return self.osdspec_last_applied[host][ spec.service_name()] < self.last_device_change[host]
def with_daemon(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth, host: str): spec.placement = PlacementSpec(hosts=[host], count=1) c = meth(cephadm_module, spec) [out] = wait(cephadm_module, c) match_glob(out, f"Deployed {spec.service_name()}.* on host '{host}'") dds = cephadm_module.cache.get_daemons_by_service(spec.service_name()) for dd in dds: if dd.hostname == host: yield dd.daemon_id assert_rm_daemon(cephadm_module, spec.service_name(), host) return assert False, 'Daemon not found'
def _apply_service_config(self, spec: ServiceSpec) -> None: if spec.config: section = utils.name_to_config_section(spec.service_name()) for k, v in spec.config.items(): try: current = self.mgr.get_foreign_ceph_option(section, k) except KeyError: self.log.warning( f'Ignoring invalid {spec.service_name()} config option {k}' ) self.mgr.events.for_service( spec, OrchestratorEvent.ERROR, f'Invalid config option {k}' ) continue if current != v: self.log.debug(f'setting [{section}] {k} = {v}') try: self.mgr.check_mon_command({ 'prefix': 'config set', 'name': k, 'value': str(v), 'who': section, }) except MonCommandFailed as e: self.log.warning( f'Failed to set {spec.service_name()} option {k}: {e}' )
def convert_to_explicit(spec: ServiceSpec) -> None: placements = HostAssignment(spec=spec, get_hosts_func=self.mgr._get_hosts, get_daemons_func=self.mgr.cache. get_daemons_by_service).place() existing_daemons = self.mgr.cache.get_daemons_by_service( spec.service_name()) # We have to migrate, only if the new scheduler would remove daemons if len(placements) >= len(existing_daemons): return old_hosts = {h.hostname: h for h in spec.placement.hosts} new_hosts = [ old_hosts[d.hostname] if d.hostname in old_hosts else HostPlacementSpec(hostname=d.hostname, network='', name='') for d in existing_daemons ] new_placement = PlacementSpec(hosts=new_hosts, count=spec.placement.count) new_spec = ServiceSpec.from_json(spec.to_json()) new_spec.placement = new_placement logger.info( f"Migrating {spec.one_line_str()} to explicit placement") self.mgr.spec_store.save(new_spec)
def test_daemon_description_service_name(spec: ServiceSpec, dd: DaemonDescription, valid: bool): if valid: assert spec.service_name() == dd.service_name() else: with pytest.raises(OrchestratorError): dd.service_name()
def test_mds_config_purge(self, cephadm_module: CephadmOrchestrator): spec = ServiceSpec('mds', service_id='fsname') with with_host(cephadm_module, 'test'): with with_service(cephadm_module, spec, host='test'): ret, out, err = cephadm_module.check_mon_command({ 'prefix': 'config get', 'who': spec.service_name(), 'key': 'mds_join_fs', }) assert out == 'fsname' ret, out, err = cephadm_module.check_mon_command({ 'prefix': 'config get', 'who': spec.service_name(), 'key': 'mds_join_fs', }) assert not out
def test_apply_save(self, spec: ServiceSpec, meth, cephadm_module: CephadmOrchestrator): with with_host(cephadm_module, 'test'): if not spec.placement: spec.placement = PlacementSpec(hosts=['test'], count=1) c = meth(cephadm_module, spec) assert wait(cephadm_module, c) == f'Scheduled {spec.service_name()} update...' assert [d.spec for d in wait(cephadm_module, cephadm_module.describe_service())] == [spec] cephadm_module._apply_all_services() dds = wait(cephadm_module, cephadm_module.list_daemons()) for dd in dds: assert dd.service_name() == spec.service_name() assert_rm_service(cephadm_module, spec.service_name())
def convert_to_explicit(spec: ServiceSpec) -> None: existing_daemons = self.mgr.cache.get_daemons_by_service(spec.service_name()) placements, to_add, to_remove = HostAssignment( spec=spec, hosts=self.mgr.inventory.all_specs(), unreachable_hosts=self.mgr.cache.get_unreachable_hosts(), daemons=existing_daemons, ).place() # We have to migrate, only if the new scheduler would remove daemons if len(placements) >= len(existing_daemons): return def to_hostname(d: DaemonDescription) -> HostPlacementSpec: if d.hostname in old_hosts: return old_hosts[d.hostname] else: assert d.hostname return HostPlacementSpec(d.hostname, '', '') old_hosts = {h.hostname: h for h in spec.placement.hosts} new_hosts = [to_hostname(d) for d in existing_daemons] new_placement = PlacementSpec( hosts=new_hosts, count=spec.placement.count ) new_spec = ServiceSpec.from_json(spec.to_json()) new_spec.placement = new_placement logger.info(f"Migrating {spec.one_line_str()} to explicit placement") self.mgr.spec_store.save(new_spec)
def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth, host: str) -> Iterator[List[str]]: if spec.placement.is_empty(): spec.placement = PlacementSpec(hosts=[host], count=1) c = meth(cephadm_module, spec) assert wait(cephadm_module, c) == f'Scheduled {spec.service_name()} update...' specs = [d.spec for d in wait(cephadm_module, cephadm_module.describe_service())] assert spec in specs cephadm_module._apply_all_services() dds = wait(cephadm_module, cephadm_module.list_daemons()) own_dds = [dd for dd in dds if dd.service_name() == spec.service_name()] assert own_dds yield [dd.name() for dd in own_dds] assert_rm_service(cephadm_module, spec.service_name())
def test_apply_save(self, spec: ServiceSpec, meth, cephadm_module): with self._with_host(cephadm_module, 'test'): spec.placement = PlacementSpec(hosts=['test'], count=1) c = meth(cephadm_module, spec) assert wait(cephadm_module, c) == f'Scheduled {spec.service_name()} update...' assert [d.spec for d in wait(cephadm_module, cephadm_module.describe_service())] == [spec] assert_rm_service(cephadm_module, spec.service_name())
def test_daemon_add(self, spec: ServiceSpec, meth, cephadm_module): with with_host(cephadm_module, 'test'): spec.placement = PlacementSpec(hosts=['test'], count=1) c = meth(cephadm_module, spec) [out] = wait(cephadm_module, c) match_glob(out, f"Deployed {spec.service_name()}.* on host 'test'") assert_rm_daemon(cephadm_module, spec.service_name(), 'test')
def update_ha_rgw_definitive_hosts(self, spec: ServiceSpec, hosts: List[HostPlacementSpec], add_hosts: Set[HostPlacementSpec]) -> HA_RGWSpec: spec = cast(HA_RGWSpec, spec) if not (set(hosts) == set(spec.definitive_host_list)): spec.definitive_host_list = hosts ha_rgw_daemons = self.mgr.cache.get_daemons_by_service(spec.service_name()) for daemon in ha_rgw_daemons: if daemon.hostname in [h.hostname for h in hosts] and daemon.hostname not in add_hosts: self.mgr.cache.schedule_daemon_action( daemon.hostname, daemon.name(), 'reconfig') return spec
def save( self, spec: ServiceSpec, update_create: bool = True, ) -> None: name = spec.service_name() if spec.preview_only: self.spec_preview[name] = spec return None self._specs[name] = spec if update_create: self.spec_created[name] = datetime_now() self._save(name)
def update_ha_rgw_definitive_hosts( self, spec: ServiceSpec, hosts: List[DaemonPlacement], add_hosts: List[DaemonPlacement] ) -> HA_RGWSpec: spec = cast(HA_RGWSpec, spec) hostnames = [p.hostname for p in hosts] add_hostnames = [p.hostname for p in add_hosts] if not (set(hostnames) == set(spec.definitive_host_list)): spec.definitive_host_list = hostnames ha_rgw_daemons = self.mgr.cache.get_daemons_by_service(spec.service_name()) for daemon in ha_rgw_daemons: if daemon.hostname in hostnames and daemon.hostname not in add_hostnames: assert daemon.hostname is not None self.mgr.cache.schedule_daemon_action( daemon.hostname, daemon.name(), 'reconfig') return spec
def save(self, spec: ServiceSpec, update_create: bool = True) -> None: name = spec.service_name() if spec.preview_only: self.spec_preview[name] = spec return None self._specs[name] = spec if update_create: self.spec_created[name] = datetime_now() data = { 'spec': spec.to_json(), 'created': datetime_to_str(self.spec_created[name]), } if name in self.spec_deleted: data['deleted'] = datetime_to_str(self.spec_deleted[name]) self.mgr.set_store( SPEC_STORE_PREFIX + name, json.dumps(data, sort_keys=True), ) self.mgr.events.for_service(spec, OrchestratorEvent.INFO, 'service was created')
def _apply_service(self, spec: ServiceSpec) -> bool: """ Schedule a service. Deploy new daemons or remove old ones, depending on the target label and count specified in the placement. """ self.mgr.migration.verify_no_migration() daemon_type = spec.service_type service_name = spec.service_name() if spec.unmanaged: self.log.debug('Skipping unmanaged service %s' % service_name) return False if spec.preview_only: self.log.debug('Skipping preview_only service %s' % service_name) return False self.log.debug('Applying service %s spec' % service_name) config_func = self._config_fn(daemon_type) if daemon_type == 'osd': self.mgr.osd_service.create_from_spec(cast(DriveGroupSpec, spec)) # TODO: return True would result in a busy loop # can't know if daemon count changed; create_from_spec doesn't # return a solid indication return False daemons = self.mgr.cache.get_daemons_by_service(service_name) public_network = None if daemon_type == 'mon': ret, out, err = self.mgr.check_mon_command({ 'prefix': 'config get', 'who': 'mon', 'key': 'public_network', }) if '/' in out: public_network = out.strip() self.log.debug('mon public_network is %s' % public_network) def matches_network(host): # type: (str) -> bool if not public_network: return False # make sure we have 1 or more IPs for that network on that # host return len(self.mgr.cache.networks[host].get(public_network, [])) > 0 ha = HostAssignment( spec=spec, hosts=self.mgr._hosts_with_daemon_inventory(), get_daemons_func=self.mgr.cache.get_daemons_by_service, filter_new_host=matches_network if daemon_type == 'mon' else None, ) hosts: List[HostPlacementSpec] = ha.place() self.log.debug('Usable hosts: %s' % hosts) r = None # sanity check if daemon_type in ['mon', 'mgr'] and len(hosts) < 1: self.log.debug('cannot scale mon|mgr below 1 (hosts=%s)' % hosts) return False # add any? did_config = False add_daemon_hosts: Set[HostPlacementSpec] = ha.add_daemon_hosts(hosts) self.log.debug('Hosts that will receive new daemons: %s' % add_daemon_hosts) remove_daemon_hosts: Set[ orchestrator.DaemonDescription] = ha.remove_daemon_hosts(hosts) self.log.debug('Hosts that will loose daemons: %s' % remove_daemon_hosts) for host, network, name in add_daemon_hosts: daemon_id = self.mgr.get_unique_name(daemon_type, host, daemons, prefix=spec.service_id, forcename=name) if not did_config and config_func: if daemon_type == 'rgw': rgw_config_func = cast(Callable[[RGWSpec, str], None], config_func) rgw_config_func(cast(RGWSpec, spec), daemon_id) else: config_func(spec) did_config = True daemon_spec = self.mgr.cephadm_services[ daemon_type].make_daemon_spec(host, daemon_id, network, spec) self.log.debug('Placing %s.%s on host %s' % (daemon_type, daemon_id, host)) try: daemon_spec = self.mgr.cephadm_services[ daemon_type].prepare_create(daemon_spec) self.mgr._create_daemon(daemon_spec) r = True except (RuntimeError, OrchestratorError) as e: self.mgr.events.for_service( spec, 'ERROR', f"Failed while placing {daemon_type}.{daemon_id}" f"on {host}: {e}") # only return "no change" if no one else has already succeeded. # later successes will also change to True if r is None: r = False continue # add to daemon list so next name(s) will also be unique sd = orchestrator.DaemonDescription( hostname=host, daemon_type=daemon_type, daemon_id=daemon_id, ) daemons.append(sd) # remove any? def _ok_to_stop( remove_daemon_hosts: Set[orchestrator.DaemonDescription] ) -> bool: daemon_ids = [d.daemon_id for d in remove_daemon_hosts] r = self.mgr.cephadm_services[daemon_type].ok_to_stop(daemon_ids) return not r.retval while remove_daemon_hosts and not _ok_to_stop(remove_daemon_hosts): # let's find a subset that is ok-to-stop remove_daemon_hosts.pop() for d in remove_daemon_hosts: r = True # NOTE: we are passing the 'force' flag here, which means # we can delete a mon instances data. self.mgr._remove_daemon(d.name(), d.hostname) if r is None: r = False return r
def _apply_service(self, spec: ServiceSpec) -> bool: """ Schedule a service. Deploy new daemons or remove old ones, depending on the target label and count specified in the placement. """ self.mgr.migration.verify_no_migration() service_type = spec.service_type service_name = spec.service_name() if spec.unmanaged: self.log.debug('Skipping unmanaged service %s' % service_name) return False if spec.preview_only: self.log.debug('Skipping preview_only service %s' % service_name) return False self.log.debug('Applying service %s spec' % service_name) if service_type == 'osd': self.mgr.osd_service.create_from_spec(cast(DriveGroupSpec, spec)) # TODO: return True would result in a busy loop # can't know if daemon count changed; create_from_spec doesn't # return a solid indication return False daemons = self.mgr.cache.get_daemons_by_service(service_name) public_network = None if service_type == 'mon': out = str(self.mgr.get_foreign_ceph_option('mon', 'public_network')) if '/' in out: public_network = out.strip() self.log.debug('mon public_network is %s' % public_network) def matches_network(host): # type: (str) -> bool if not public_network: return False # make sure we have 1 or more IPs for that network on that # host return len(self.mgr.cache.networks[host].get(public_network, [])) > 0 def virtual_ip_allowed(host): # type: (str) -> bool # Verify that it is possible to use Virtual IPs in the host try: if self.mgr.cache.facts[host]['kernel_parameters'][ 'net.ipv4.ip_nonlocal_bind'] == '0': return False except KeyError: return False return True ha = HostAssignment( spec=spec, hosts=self.mgr._hosts_with_daemon_inventory(), get_daemons_func=self.mgr.cache.get_daemons_by_service, filter_new_host=matches_network if service_type == 'mon' else virtual_ip_allowed if service_type == 'ha-rgw' else None, ) try: hosts: List[HostPlacementSpec] = ha.place() self.log.debug('Usable hosts: %s' % hosts) except OrchestratorError as e: self.log.error('Failed to apply %s spec %s: %s' % (spec.service_name(), spec, e)) self.mgr.events.for_service(spec, 'ERROR', 'Failed to apply: ' + str(e)) return False r = None # sanity check if service_type in ['mon', 'mgr'] and len(hosts) < 1: self.log.debug('cannot scale mon|mgr below 1 (hosts=%s)' % hosts) return False # add any? did_config = False add_daemon_hosts: Set[HostPlacementSpec] = ha.add_daemon_hosts(hosts) self.log.debug('Hosts that will receive new daemons: %s' % add_daemon_hosts) remove_daemon_hosts: Set[ orchestrator.DaemonDescription] = ha.remove_daemon_hosts(hosts) self.log.debug('Hosts that will loose daemons: %s' % remove_daemon_hosts) if service_type == 'ha-rgw': spec = self.update_ha_rgw_definitive_hosts(spec, hosts, add_daemon_hosts) for host, network, name in add_daemon_hosts: for daemon_type in service_to_daemon_types(service_type): daemon_id = self.mgr.get_unique_name(daemon_type, host, daemons, prefix=spec.service_id, forcename=name) if not did_config: self.mgr.cephadm_services[service_type].config( spec, daemon_id) did_config = True daemon_spec = self.mgr.cephadm_services[ service_type].make_daemon_spec(host, daemon_id, network, spec, daemon_type=daemon_type) self.log.debug('Placing %s.%s on host %s' % (daemon_type, daemon_id, host)) try: daemon_spec = self.mgr.cephadm_services[ service_type].prepare_create(daemon_spec) self._create_daemon(daemon_spec) r = True except (RuntimeError, OrchestratorError) as e: self.mgr.events.for_service( spec, 'ERROR', f"Failed while placing {daemon_type}.{daemon_id}" f"on {host}: {e}") # only return "no change" if no one else has already succeeded. # later successes will also change to True if r is None: r = False continue # add to daemon list so next name(s) will also be unique sd = orchestrator.DaemonDescription( hostname=host, daemon_type=daemon_type, daemon_id=daemon_id, ) daemons.append(sd) # remove any? def _ok_to_stop( remove_daemon_hosts: Set[orchestrator.DaemonDescription] ) -> bool: daemon_ids = [d.daemon_id for d in remove_daemon_hosts] assert None not in daemon_ids # setting force flag retains previous behavior, should revisit later. r = self.mgr.cephadm_services[service_type].ok_to_stop(cast( List[str], daemon_ids), force=True) return not r.retval while remove_daemon_hosts and not _ok_to_stop(remove_daemon_hosts): # let's find a subset that is ok-to-stop remove_daemon_hosts.pop() for d in remove_daemon_hosts: r = True # NOTE: we are passing the 'force' flag here, which means # we can delete a mon instances data. assert d.hostname is not None self._remove_daemon(d.name(), d.hostname) if r is None: r = False return r
def for_service(self, spec: ServiceSpec, level: str, message: str) -> None: e = OrchestratorEvent(datetime_now(), 'service', spec.service_name(), level, message) self.add(e)
def get_created(self, spec: ServiceSpec) -> Optional[datetime.datetime]: return self.spec_created.get(spec.service_name())