def convert_to_explicit(spec: ServiceSpec) -> None: placements = HostAssignment(spec=spec, get_hosts_func=self.mgr._get_hosts, get_daemons_func=self.mgr.cache. get_daemons_by_service).place() existing_daemons = self.mgr.cache.get_daemons_by_service( spec.service_name()) # We have to migrate, only if the new scheduler would remove daemons if len(placements) >= len(existing_daemons): return old_hosts = {h.hostname: h for h in spec.placement.hosts} new_hosts = [ old_hosts[d.hostname] if d.hostname in old_hosts else HostPlacementSpec(hostname=d.hostname, network='', name='') for d in existing_daemons ] new_placement = PlacementSpec(hosts=new_hosts, count=spec.placement.count) new_spec = ServiceSpec.from_json(spec.to_json()) new_spec.placement = new_placement logger.info( f"Migrating {spec.one_line_str()} to explicit placement") self.mgr.spec_store.save(new_spec)
def convert_to_explicit(spec: ServiceSpec) -> None: existing_daemons = self.mgr.cache.get_daemons_by_service(spec.service_name()) placements, to_add, to_remove = HostAssignment( spec=spec, hosts=self.mgr.inventory.all_specs(), unreachable_hosts=self.mgr.cache.get_unreachable_hosts(), daemons=existing_daemons, ).place() # We have to migrate, only if the new scheduler would remove daemons if len(placements) >= len(existing_daemons): return def to_hostname(d: DaemonDescription) -> HostPlacementSpec: if d.hostname in old_hosts: return old_hosts[d.hostname] else: assert d.hostname return HostPlacementSpec(d.hostname, '', '') old_hosts = {h.hostname: h for h in spec.placement.hosts} new_hosts = [to_hostname(d) for d in existing_daemons] new_placement = PlacementSpec( hosts=new_hosts, count=spec.placement.count ) new_spec = ServiceSpec.from_json(spec.to_json()) new_spec.placement = new_placement logger.info(f"Migrating {spec.one_line_str()} to explicit placement") self.mgr.spec_store.save(new_spec)
def _apply_service(self, spec: ServiceSpec) -> bool: """ Schedule a service. Deploy new daemons or remove old ones, depending on the target label and count specified in the placement. """ self.mgr.migration.verify_no_migration() daemon_type = spec.service_type service_name = spec.service_name() if spec.unmanaged: self.log.debug('Skipping unmanaged service %s' % service_name) return False if spec.preview_only: self.log.debug('Skipping preview_only service %s' % service_name) return False self.log.debug('Applying service %s spec' % service_name) config_func = self._config_fn(daemon_type) if daemon_type == 'osd': self.mgr.osd_service.create_from_spec(cast(DriveGroupSpec, spec)) # TODO: return True would result in a busy loop # can't know if daemon count changed; create_from_spec doesn't # return a solid indication return False daemons = self.mgr.cache.get_daemons_by_service(service_name) public_network = None if daemon_type == 'mon': ret, out, err = self.mgr.check_mon_command({ 'prefix': 'config get', 'who': 'mon', 'key': 'public_network', }) if '/' in out: public_network = out.strip() self.log.debug('mon public_network is %s' % public_network) def matches_network(host): # type: (str) -> bool if not public_network: return False # make sure we have 1 or more IPs for that network on that # host return len(self.mgr.cache.networks[host].get(public_network, [])) > 0 ha = HostAssignment( spec=spec, hosts=self.mgr._hosts_with_daemon_inventory(), get_daemons_func=self.mgr.cache.get_daemons_by_service, filter_new_host=matches_network if daemon_type == 'mon' else None, ) hosts: List[HostPlacementSpec] = ha.place() self.log.debug('Usable hosts: %s' % hosts) r = None # sanity check if daemon_type in ['mon', 'mgr'] and len(hosts) < 1: self.log.debug('cannot scale mon|mgr below 1 (hosts=%s)' % hosts) return False # add any? did_config = False add_daemon_hosts: Set[HostPlacementSpec] = ha.add_daemon_hosts(hosts) self.log.debug('Hosts that will receive new daemons: %s' % add_daemon_hosts) remove_daemon_hosts: Set[ orchestrator.DaemonDescription] = ha.remove_daemon_hosts(hosts) self.log.debug('Hosts that will loose daemons: %s' % remove_daemon_hosts) for host, network, name in add_daemon_hosts: daemon_id = self.mgr.get_unique_name(daemon_type, host, daemons, prefix=spec.service_id, forcename=name) if not did_config and config_func: if daemon_type == 'rgw': rgw_config_func = cast(Callable[[RGWSpec, str], None], config_func) rgw_config_func(cast(RGWSpec, spec), daemon_id) else: config_func(spec) did_config = True daemon_spec = self.mgr.cephadm_services[ daemon_type].make_daemon_spec(host, daemon_id, network, spec) self.log.debug('Placing %s.%s on host %s' % (daemon_type, daemon_id, host)) try: daemon_spec = self.mgr.cephadm_services[ daemon_type].prepare_create(daemon_spec) self.mgr._create_daemon(daemon_spec) r = True except (RuntimeError, OrchestratorError) as e: self.mgr.events.for_service( spec, 'ERROR', f"Failed while placing {daemon_type}.{daemon_id}" f"on {host}: {e}") # only return "no change" if no one else has already succeeded. # later successes will also change to True if r is None: r = False continue # add to daemon list so next name(s) will also be unique sd = orchestrator.DaemonDescription( hostname=host, daemon_type=daemon_type, daemon_id=daemon_id, ) daemons.append(sd) # remove any? def _ok_to_stop( remove_daemon_hosts: Set[orchestrator.DaemonDescription] ) -> bool: daemon_ids = [d.daemon_id for d in remove_daemon_hosts] r = self.mgr.cephadm_services[daemon_type].ok_to_stop(daemon_ids) return not r.retval while remove_daemon_hosts and not _ok_to_stop(remove_daemon_hosts): # let's find a subset that is ok-to-stop remove_daemon_hosts.pop() for d in remove_daemon_hosts: r = True # NOTE: we are passing the 'force' flag here, which means # we can delete a mon instances data. self.mgr._remove_daemon(d.name(), d.hostname) if r is None: r = False return r
def _apply_service(self, spec: ServiceSpec) -> bool: """ Schedule a service. Deploy new daemons or remove old ones, depending on the target label and count specified in the placement. """ self.mgr.migration.verify_no_migration() service_type = spec.service_type service_name = spec.service_name() if spec.unmanaged: self.log.debug('Skipping unmanaged service %s' % service_name) return False if spec.preview_only: self.log.debug('Skipping preview_only service %s' % service_name) return False self.log.debug('Applying service %s spec' % service_name) if service_type == 'osd': self.mgr.osd_service.create_from_spec(cast(DriveGroupSpec, spec)) # TODO: return True would result in a busy loop # can't know if daemon count changed; create_from_spec doesn't # return a solid indication return False daemons = self.mgr.cache.get_daemons_by_service(service_name) public_network = None if service_type == 'mon': out = str(self.mgr.get_foreign_ceph_option('mon', 'public_network')) if '/' in out: public_network = out.strip() self.log.debug('mon public_network is %s' % public_network) def matches_network(host): # type: (str) -> bool if not public_network: return False # make sure we have 1 or more IPs for that network on that # host return len(self.mgr.cache.networks[host].get(public_network, [])) > 0 def virtual_ip_allowed(host): # type: (str) -> bool # Verify that it is possible to use Virtual IPs in the host try: if self.mgr.cache.facts[host]['kernel_parameters'][ 'net.ipv4.ip_nonlocal_bind'] == '0': return False except KeyError: return False return True ha = HostAssignment( spec=spec, hosts=self.mgr._hosts_with_daemon_inventory(), get_daemons_func=self.mgr.cache.get_daemons_by_service, filter_new_host=matches_network if service_type == 'mon' else virtual_ip_allowed if service_type == 'ha-rgw' else None, ) try: hosts: List[HostPlacementSpec] = ha.place() self.log.debug('Usable hosts: %s' % hosts) except OrchestratorError as e: self.log.error('Failed to apply %s spec %s: %s' % (spec.service_name(), spec, e)) self.mgr.events.for_service(spec, 'ERROR', 'Failed to apply: ' + str(e)) return False r = None # sanity check if service_type in ['mon', 'mgr'] and len(hosts) < 1: self.log.debug('cannot scale mon|mgr below 1 (hosts=%s)' % hosts) return False # add any? did_config = False add_daemon_hosts: Set[HostPlacementSpec] = ha.add_daemon_hosts(hosts) self.log.debug('Hosts that will receive new daemons: %s' % add_daemon_hosts) remove_daemon_hosts: Set[ orchestrator.DaemonDescription] = ha.remove_daemon_hosts(hosts) self.log.debug('Hosts that will loose daemons: %s' % remove_daemon_hosts) if service_type == 'ha-rgw': spec = self.update_ha_rgw_definitive_hosts(spec, hosts, add_daemon_hosts) for host, network, name in add_daemon_hosts: for daemon_type in service_to_daemon_types(service_type): daemon_id = self.mgr.get_unique_name(daemon_type, host, daemons, prefix=spec.service_id, forcename=name) if not did_config: self.mgr.cephadm_services[service_type].config( spec, daemon_id) did_config = True daemon_spec = self.mgr.cephadm_services[ service_type].make_daemon_spec(host, daemon_id, network, spec, daemon_type=daemon_type) self.log.debug('Placing %s.%s on host %s' % (daemon_type, daemon_id, host)) try: daemon_spec = self.mgr.cephadm_services[ service_type].prepare_create(daemon_spec) self._create_daemon(daemon_spec) r = True except (RuntimeError, OrchestratorError) as e: self.mgr.events.for_service( spec, 'ERROR', f"Failed while placing {daemon_type}.{daemon_id}" f"on {host}: {e}") # only return "no change" if no one else has already succeeded. # later successes will also change to True if r is None: r = False continue # add to daemon list so next name(s) will also be unique sd = orchestrator.DaemonDescription( hostname=host, daemon_type=daemon_type, daemon_id=daemon_id, ) daemons.append(sd) # remove any? def _ok_to_stop( remove_daemon_hosts: Set[orchestrator.DaemonDescription] ) -> bool: daemon_ids = [d.daemon_id for d in remove_daemon_hosts] assert None not in daemon_ids # setting force flag retains previous behavior, should revisit later. r = self.mgr.cephadm_services[service_type].ok_to_stop(cast( List[str], daemon_ids), force=True) return not r.retval while remove_daemon_hosts and not _ok_to_stop(remove_daemon_hosts): # let's find a subset that is ok-to-stop remove_daemon_hosts.pop() for d in remove_daemon_hosts: r = True # NOTE: we are passing the 'force' flag here, which means # we can delete a mon instances data. assert d.hostname is not None self._remove_daemon(d.name(), d.hostname) if r is None: r = False return r