class SlurmctldCharm(CharmBase): """Slurmctld lifecycle events.""" _stored = StoredState() def __init__(self, *args): """Init _stored attributes and interfaces, observe events.""" super().__init__(*args) self._stored.set_default( jwt_key=str(), munge_key=str(), slurm_installed=False, slurmd_available=False, slurmrestd_available=False, slurmdbd_available=False, down_nodes=list(), ) self._slurm_manager = SlurmManager(self, "slurmctld") self._slurmd = Slurmd(self, "slurmd") self._slurmdbd = Slurmdbd(self, "slurmdbd") self._slurmrestd = Slurmrestd(self, "slurmrestd") self._slurmctld_peer = SlurmctldPeer(self, "slurmctld-peer") self._prolog_epilog = PrologEpilog(self, "prolog-epilog") self._grafana = GrafanaSource(self, "grafana-source") self._influxdb = InfluxDB(self, "influxdb-api") self._elasticsearch = Elasticsearch(self, "elasticsearch") self._fluentbit = FluentbitClient(self, "fluentbit") self._user_group = UserGroupProvides(self, "user-group") self._etcd = EtcdOps() event_handler_bindings = { self.on.install: self._on_install, self.on.upgrade_charm: self._on_upgrade, self.on.update_status: self._on_update_status, self.on.config_changed: self._on_write_slurm_config, self.on.leader_elected: self._on_leader_elected, # slurm component lifecycle events self._slurmdbd.on.slurmdbd_available: self._on_slurmdbd_available, self._slurmdbd.on.slurmdbd_unavailable: self._on_slurmdbd_unavailable, self._slurmd.on.slurmd_available: self._on_write_slurm_config, self._slurmd.on.slurmd_unavailable: self._on_write_slurm_config, self._slurmd.on.slurmd_departed: self._on_write_slurm_config, self._slurmrestd.on.slurmrestd_available: self._on_slurmrestd_available, self._slurmrestd.on.slurmrestd_unavailable: self._on_write_slurm_config, self._slurmctld_peer.on.slurmctld_peer_available: self. _on_write_slurm_config, # NOTE: a second slurmctld should get the jwt/munge keys and configure them # fluentbit self.on["fluentbit"].relation_created: self._on_fluentbit_relation_created, # Addons lifecycle events self._prolog_epilog.on.prolog_epilog_available: self._on_write_slurm_config, self._prolog_epilog.on.prolog_epilog_unavailable: self._on_write_slurm_config, self._grafana.on.grafana_available: self._on_grafana_available, self._influxdb.on.influxdb_available: self._on_influxdb_available, self._influxdb.on.influxdb_unavailable: self._on_write_slurm_config, self._elasticsearch.on.elasticsearch_available: self._on_elasticsearch_available, self._elasticsearch.on.elasticsearch_unavailable: self._on_write_slurm_config, self._user_group.on.create_user_group: self._on_create_user_group, self._user_group.on.remove_user_group: self._on_remove_user_group, # actions self.on.show_current_config_action: self._on_show_current_config, self.on.drain_action: self._drain_nodes_action, self.on.resume_action: self._resume_nodes_action, self.on.influxdb_info_action: self._infludb_info_action, } for event, handler in event_handler_bindings.items(): self.framework.observe(event, handler) @property def hostname(self): """Return the hostname.""" return self._slurm_manager.hostname @property def port(self): """Return the port.""" return self._slurm_manager.port @property def cluster_name(self) -> str: """Return the cluster name.""" return self.config.get("cluster-name") @property def _slurmctld_info(self): return self._slurmctld_peer.get_slurmctld_info() @property def slurmdbd_info(self): """Return slurmdbd_info from relation.""" return self._slurmdbd.get_slurmdbd_info() @property def _slurmd_info(self) -> list: return self._slurmd.get_slurmd_info() @property def _cluster_info(self): """Assemble information about the cluster.""" cluster_info = {} cluster_info['cluster_name'] = self.config.get('cluster-name') cluster_info['custom_config'] = self.config.get('custom-config') cluster_info['proctrack_type'] = self.config.get('proctrack-type') cluster_info['cgroup_config'] = self.config.get('cgroup-config') interval = self.config.get('health-check-interval') state = self.config.get('health-check-state') nhc = self._slurm_manager.slurm_config_nhc_values(interval, state) cluster_info.update(nhc) return cluster_info @property def _addons_info(self): """Assemble addons for slurm.conf.""" return { **self._assemble_prolog_epilog(), **self._assemble_acct_gather_addon(), **self._assemble_elastic_search_addon() } def _assemble_prolog_epilog(self) -> dict: """Generate the prolog_epilog section of the addons.""" logger.debug("## Generating prolog epilog configuration") prolog_epilog = self._prolog_epilog.get_prolog_epilog() if prolog_epilog: return {"prolog_epilog": prolog_epilog} else: return {} def _assemble_acct_gather_addon(self): """Generate the acct gather section of the addons.""" logger.debug("## Generating acct gather configuration") addons = dict() influxdb_info = self._get_influxdb_info() if influxdb_info: addons["acct_gather"] = influxdb_info addons["acct_gather"]["default"] = "all" addons["acct_gather_profile"] = "acct_gather_profile/influxdb" # it is possible to setup influxdb or hdf5 profiles without the # relation, using the custom-config section of slurm.conf. We need to # support setting up the acct_gather configuration for this scenario acct_gather_custom = self.config.get("acct-gather-custom") if acct_gather_custom: if not addons.get("acct_gather"): addons["acct_gather"] = dict() addons["acct_gather"]["custom"] = acct_gather_custom addons["acct_gather_frequency"] = self.config.get( "acct-gather-frequency") return addons def _assemble_elastic_search_addon(self): """Generate the acct gather section of the addons.""" logger.debug("## Generating elastic search addon configuration") addon = dict() elasticsearch_ingress = self._elasticsearch.elasticsearch_ingress if elasticsearch_ingress: suffix = f"/{self.cluster_name}/jobcomp" addon = { "elasticsearch_address": f"{elasticsearch_ingress}{suffix}" } return addon def set_slurmd_available(self, flag: bool): """Set stored value of slurmd available.""" self._stored.slurmd_available = flag def _set_slurmdbd_available(self, flag: bool): """Set stored value of slurmdbd available.""" self._stored.slurmdbd_available = flag def set_slurmrestd_available(self, flag: bool): """Set stored value of slurmdrest available.""" self._stored.slurmrestd_available = flag def _is_leader(self): return self.model.unit.is_leader() def is_slurm_installed(self): """Return true/false based on whether or not slurm is installed.""" return self._stored.slurm_installed def _on_show_current_config(self, event): """Show current slurm.conf.""" slurm_conf = self._slurm_manager.get_slurm_conf() event.set_results({"slurm.conf": slurm_conf}) def _on_install(self, event): """Perform installation operations for slurmctld.""" self.unit.set_workload_version(Path("version").read_text().strip()) self.unit.status = WaitingStatus("Installing slurmctld") custom_repo = self.config.get("custom-slurm-repo") successful_installation = self._slurm_manager.install(custom_repo) if successful_installation: self._stored.slurm_installed = True # Store the munge_key and jwt_rsa key in the stored state. # NOTE: Use leadership settings instead of stored state when # leadership settings support becomes available in the framework. if self._is_leader(): # NOTE the backup controller should also have the jwt and munge # keys configured. We should move these information to the # peer relation. self._stored.jwt_rsa = self._slurm_manager.generate_jwt_rsa() self._stored.munge_key = self._slurm_manager.get_munge_key() self._slurm_manager.configure_jwt_rsa(self.get_jwt_rsa()) else: # NOTE: the secondary slurmctld should get the jwt and munge # keys from the peer relation here logger.debug("secondary slurmctld") # all slurmctld should restart munged here, as it would assure # munge is working self._slurm_manager.restart_munged() else: self.unit.status = BlockedStatus("Error installing slurmctld") event.defer() logger.debug("## Retrieving etcd resource to install it") try: etcd_path = self.model.resources.fetch("etcd") logger.debug(f"## Found etcd resource: {etcd_path}") except ModelError: logger.error("## Missing etcd resource") self.unit.status = BlockedStatus("Missing etcd resource") event.defer() return self._etcd.install(etcd_path) self._check_status() def _on_fluentbit_relation_created(self, event): """Set up Fluentbit log forwarding.""" logger.debug("## Configuring fluentbit") cfg = list() cfg.extend(self._slurm_manager.fluentbit_config_nhc) cfg.extend(self._slurm_manager.fluentbit_config_slurm) self._fluentbit.configure(cfg) def _on_upgrade(self, event): """Perform upgrade operations.""" self.unit.set_workload_version(Path("version").read_text().strip()) def _on_update_status(self, event): """Handle update status.""" self._check_status() def _on_leader_elected(self, event: LeaderElectedEvent) -> None: logger.debug("## slurmctld - leader elected") self._etcd.start() # populate etcd with the nodelist slurm_config = self._assemble_slurm_config() accounted_nodes = self._assemble_all_nodes( slurm_config.get("partitions", [])) logger.debug( f"## Sending to etcd list of accounted nodes: {accounted_nodes}") self._etcd.set_list_of_accounted_nodes(accounted_nodes) def _check_status(self): """Check for all relations and set appropriate status. This charm needs these conditions to be satified in order to be ready: - Slurm components installed. - Munge running. - slurmdbd node running. - slurmd inventory. """ # NOTE: slurmd and slurmrestd are not needed for slurmctld to work, # only for the cluster to operate. But we need slurmd inventory # to assemble slurm.conf if self._slurm_manager.needs_reboot: self.unit.status = BlockedStatus("Machine needs reboot") return False if not self._stored.slurm_installed: self.unit.status = BlockedStatus("Error installing slurmctld") return False if (self._is_leader() and not self._etcd.is_active()): self.unit.status = WaitingStatus("Initializing charm") return False if not self._slurm_manager.check_munged(): self.unit.status = BlockedStatus("Error configuring munge key") return False # statuses of mandatory components: # - joined: someone executed juju relate slurmctld foo # - available: the units exchanged data through the relation # NOTE: slurmrestd is not mandatory for the cluster to work, that's why # it is not acounted for in here statuses = { "slurmd": { "available": self._stored.slurmd_available, "joined": self._slurmd.is_joined }, "slurmdbd": { "available": self._stored.slurmdbd_available, "joined": self._slurmdbd.is_joined } } relations_needed = list() waiting_on = list() for component in statuses.keys(): if not statuses[component]["joined"]: relations_needed.append(component) if not statuses[component]["available"]: waiting_on.append(component) if len(relations_needed): msg = f"Need relations: {','.join(relations_needed)}" self.unit.status = BlockedStatus(msg) return False if len(waiting_on): msg = f"Wating on: {','.join(waiting_on)}" self.unit.status = WaitingStatus(msg) return False self.unit.status = ActiveStatus("slurmctld available") return True def get_munge_key(self): """Get the stored munge key.""" return self._stored.munge_key def get_jwt_rsa(self): """Get the stored jwt_rsa key.""" return self._stored.jwt_rsa def _assemble_partitions(self, slurmd_info): """Make any needed modifications to partition data.""" slurmd_info_tmp = copy.deepcopy(slurmd_info) default_partition_from_config = self.config.get("default-partition") for partition in slurmd_info: # Deep copy the partition to a tmp var so we can modify it as # needed whilst not modifying the object we are iterating over. partition_tmp = copy.deepcopy(partition) # Extract the partition_name from the partition. partition_name = partition["partition_name"] # Check that the default_partition isn't defined in the charm # config. # If the user hasn't provided a default partition, then we infer # the partition_default by defaulting to the "configurator" # partition. if default_partition_from_config: if default_partition_from_config == partition_name: partition_tmp["partition_default"] = "YES" slurmd_info_tmp.remove(partition) slurmd_info_tmp.append(partition_tmp) return slurmd_info_tmp def _assemble_slurm_config(self): """Assemble and return the slurm config.""" logger.debug('## Assembling new slurm.conf') slurmctld_info = self._slurmctld_info slurmdbd_info = self.slurmdbd_info slurmd_info = self._slurmd_info cluster_info = self._cluster_info logger.debug("######## INFO") logger.debug(f'## slurmd: {slurmd_info}') logger.debug(f'## slurmctld_info: {slurmctld_info}') logger.debug(f'## slurmdbd_info: {slurmdbd_info}') logger.debug(f'## cluster_info: {cluster_info}') logger.debug("######## INFO - end") if not (slurmctld_info and slurmd_info and slurmdbd_info): return {} addons_info = self._addons_info partitions_info = self._assemble_partitions(slurmd_info) down_nodes = self._assemble_down_nodes(slurmd_info) logger.debug(f'#### addons: {addons_info}') logger.debug(f'#### partitions_info: {partitions_info}') logger.debug(f"#### Down nodes: {down_nodes}") return { "partitions": partitions_info, "down_nodes": down_nodes, **slurmctld_info, **slurmdbd_info, **addons_info, **cluster_info, } def _on_slurmrestd_available(self, event): """Set slurm_config on the relation when slurmrestd available.""" if not self._check_status(): event.defer() return slurm_config = self._assemble_slurm_config() if not slurm_config: self.unit.status = BlockedStatus( "Cannot generate slurm_config - defering event.") event.defer() return if self._stored.slurmrestd_available: self._slurmrestd.set_slurm_config_on_app_relation_data( slurm_config, ) self._slurmrestd.restart_slurmrestd() def _on_slurmdbd_available(self, event): self._set_slurmdbd_available(True) self._on_write_slurm_config(event) def _on_slurmdbd_unavailable(self, event): self._set_slurmdbd_available(False) self._check_status() def _on_write_slurm_config(self, event): """Check that we have what we need before we proceed.""" logger.debug("### Slurmctld - _on_write_slurm_config()") # only the leader should write the config, restart, and scontrol reconf if not self._is_leader(): return if not self._check_status(): event.defer() return slurm_config = self._assemble_slurm_config() if slurm_config: self._slurm_manager.render_slurm_configs(slurm_config) # restart is needed if nodes are added/removed from the cluster self._slurm_manager.slurm_systemctl('restart') self._slurm_manager.slurm_cmd('scontrol', 'reconfigure') # send the list of hostnames to slurmd via etcd accounted_nodes = self._assemble_all_nodes( slurm_config["partitions"]) self._etcd.set_list_of_accounted_nodes(accounted_nodes) # send the custom NHC parameters to all slurmd self._slurmd.set_nhc_params(self.config.get('health-check-params')) # check for "not new anymore" nodes, i.e., nodes that runned the # node-configured action. Those nodes are not anymore in the # DownNodes section in the slurm.conf, but we need to resume them # manually and update the internal cache down_nodes = slurm_config['down_nodes'] configured_nodes = self._assemble_configured_nodes(down_nodes) logger.debug(f"### configured nodes: {configured_nodes}") self._resume_nodes(configured_nodes) self._stored.down_nodes = down_nodes.copy() # slurmrestd needs the slurm.conf file, so send it every time it changes if self._stored.slurmrestd_available: self._slurmrestd.set_slurm_config_on_app_relation_data( slurm_config) # NOTE: scontrol reconfigure does not restart slurmrestd self._slurmrestd.restart_slurmrestd() else: logger.debug("## Should rewrite slurm.conf, but we don't have it. " "Deferring.") event.defer() @staticmethod def _assemble_all_nodes(slurmd_info: list) -> List[str]: """Parse slurmd_info and return a list with all hostnames.""" nodes = list() for partition in slurmd_info: for node in partition["inventory"]: nodes.append(node["node_name"]) return nodes @staticmethod def _assemble_down_nodes(slurmd_info): """Parse partitions' nodes and assemble a list of DownNodes.""" down_nodes = [] for partition in slurmd_info: for node in partition["inventory"]: if node["new_node"]: down_nodes.append(node["node_name"]) return down_nodes def _assemble_configured_nodes(self, down_nodes): """Assemble list of nodes that are not new anymore. new_node status is removed with an action, this method returns a list of nodes that were previously new but are not anymore. """ configured_nodes = [] for node in self._stored.down_nodes: if node not in down_nodes: configured_nodes.append(node) return configured_nodes def _resume_nodes(self, nodelist): """Run scontrol to resume the speficied node list.""" nodes = ",".join(nodelist) update_cmd = f"update nodename={nodes} state=resume" self._slurm_manager.slurm_cmd('scontrol', update_cmd) def _on_grafana_available(self, event): """Create the grafana-source if we are the leader and have influxdb.""" if not self._is_leader(): return influxdb_info = self._get_influxdb_info() if influxdb_info: self._grafana.set_grafana_source_info(influxdb_info) else: logger.error( "## Can not set Grafana source: missing influxdb relation") def _on_influxdb_available(self, event): """Assemble addons to forward slurm data to influxdb.""" self._on_write_slurm_config(event) def _on_elasticsearch_available(self, event): """Assemble addons to forward Slurm data to elasticsearch.""" self._on_write_slurm_config(event) def _get_influxdb_info(self) -> dict: """Return influxdb info.""" return self._influxdb.get_influxdb_info() def _drain_nodes_action(self, event): """Drain specified nodes.""" nodes = event.params['nodename'] reason = event.params['reason'] logger.debug(f'#### Draining {nodes} because {reason}.') event.log(f'Draining {nodes} because {reason}.') try: cmd = f'scontrol update nodename={nodes} state=drain reason="{reason}"' subprocess.check_output(shlex.split(cmd)) event.set_results({'status': 'draining', 'nodes': nodes}) except subprocess.CalledProcessError as e: event.fail(message=f'Error draining {nodes}: {e.output}') def _resume_nodes_action(self, event): """Resume specified nodes.""" nodes = event.params['nodename'] logger.debug(f'#### Resuming {nodes}.') event.log(f'Resuming {nodes}.') try: cmd = f'scontrol update nodename={nodes} state=resume' subprocess.check_output(shlex.split(cmd)) event.set_results({'status': 'resuming', 'nodes': nodes}) except subprocess.CalledProcessError as e: event.fail(message=f'Error resuming {nodes}: {e.output}') def _infludb_info_action(self, event): influxdb_info = self._get_influxdb_info() if not influxdb_info: influxdb_info = "not related" logger.debug(f"## InfluxDB-info action: {influxdb_info}") event.set_results({"influxdb": influxdb_info}) def _on_create_user_group(self, event): """Create the user and group provided.""" user = self._user_group.user_name user_uid = self._user_group.user_uid group = self._user_group.group_name # Create the group. try: subprocess.check_output(["groupadd", "--gid", user_uid, group]) # use the UID as the GID except subprocess.CalledProcessError as e: if e.returncode == 9: logger.warning("## Group already exists.") if e.returncode == 4: logger.warning("## GID already exists.") self._user_group._relation.data[self._user_group.model.app][ "status"] = "failure: GID already exists" return else: logger.error(f"## Error creating group: {e}") # Create the user. try: subprocess.check_output([ "useradd", "--system", "--no-create-home", "--gid", group, "--shell", "/usr/sbin/nologin", "-u", user_uid, user, ]) except subprocess.CalledProcessError as e: if e.returncode == 9: logger.warning("## User already exists.") if e.returncode == 4: logger.warning("## UID already exists.") self._user_group._relation.data[self._user_group.model.app][ "status"] = "failure: UID already exists" return else: logger.error(f"## Error creating user: {e}") self._user_group._relation.data[ self._user_group.model.app]["status"] = "success: User created" def _on_remove_user_group(self, event): """Remove the user and group provided.""" user = self._user_group.user_name group = self._user_group.group_name # Remove the user. try: subprocess.check_output(["userdel", user]) except subprocess.CalledProcessError as e: logger.error(f"## Error deleting user: {e}") # Remove the group. try: subprocess.check_output(["groupdel", group]) except subprocess.CalledProcessError as e: logger.error(f"## Error deleting group: {e}")
class CephBenchmarkingPeers(Object): on = CephBenchmarkingPeerEvents() state = StoredState() SWIFT_KEY = "swift_key" SWIFT_USER_CREATED = "swift_user_created" def __init__(self, charm, relation_name): super().__init__(charm, relation_name) self.relation_name = relation_name self.this_unit = self.framework.model.unit self.framework.observe(charm.on[relation_name].relation_changed, self.on_changed) def on_changed(self, event): logging.info("CephBenchmarkingPeers on_changed") self.on.has_peers.emit() if self.ready_peer_details: self.on.ready_peers.emit() def set_swift_key(self, password): logging.info("Setting swift key") self.peers_rel.data[self.peers_rel.app][self.SWIFT_KEY] = password def set_swift_user_created(self, user): logging.info("Setting swift user created") self.peers_rel.data[self.peers_rel.app][self.SWIFT_USER_CREATED] = user @property def ready_peer_details(self): peers = { self.framework.model.unit.name: { 'ip': self.peers_bind_address } } for u in self.peers_rel.units: peers[u.name] = {'ip': self.peers_rel.data[u]['ingress-address']} return peers @property def is_joined(self): return self.peers_rel is not None @property def peers_rel(self): return self.framework.model.get_relation(self.relation_name) @property def peers_binding(self): return self.framework.model.get_binding(self.peers_rel) @property def peers_bind_address(self): return str(self.peers_binding.network.bind_address) @property def swift_key(self): if not self.peers_rel: return None return self.peers_rel.data[self.peers_rel.app].get(self.SWIFT_KEY) @property def swift_user_created(self): if not self.peers_rel: return None return self.peers_rel.data[self.peers_rel.app].get( self.SWIFT_USER_CREATED) @property def peer_addresses(self): addresses = [self.peers_bind_address] for u in self.peers_rel.units: addresses.append(self.peers_rel.data[u]['ingress-address']) return sorted(addresses) @property def peers_count(self): if self.peers_rel: return len(self.peers_rel.units) else: return 0 @property def unit_count(self): return self.peers_count + 1
class Operator(CharmBase): state = StoredState() def __init__(self, *args): super().__init__(*args) self.logger: logging.Logger = logging.getLogger(__name__) self.prometheus_provider = MetricsEndpointProvider( charm=self, relation_name="metrics-endpoint", jobs=[{ "metrics_path": METRICS_PATH, "static_configs": [{ "targets": ["*:{}".format(METRICS_PORT)] }], }], ) self.dashboard_provider = GrafanaDashboardProvider(self) for event in [ self.on.install, self.on.upgrade_charm, self.on.config_changed, self.on.oidc_client_relation_changed, self.on.ingress_relation_changed, ]: self.framework.observe(event, self.main) self._max_time_checking_resources = 150 @only_leader def main(self, event): self.model.unit.status = MaintenanceStatus("Calculating manifests") self.ensure_state() try: manifest = self.get_manifest() except Exception as err: self.model.unit.status = BlockedStatus(str(err)) return self.model.unit.status = MaintenanceStatus("Applying manifests") errors = self.set_manifest(manifest) if errors: self.model.unit.status = BlockedStatus( f"There were {len(errors)} errors while applying manifests.") for error in errors: self.logger.error(error) else: # Ensure requested resources are up try: for attempt in Retrying( retry=retry_if_exception_type(CheckFailedError), stop=stop_after_delay( max_delay=self._max_time_checking_resources), wait=wait_exponential(multiplier=0.1, min=0.1, max=15), reraise=True, ): with attempt: self.logger.info( f"Checking status of requested resources (attempt " f"{attempt.retry_state.attempt_number})") self._check_deployed_resources() except CheckFailedError: self.unit.status = BlockedStatus( "Some Kubernetes resources did not start correctly during install" ) return # Otherwise, application is working as expected self.model.unit.status = ActiveStatus() @only_leader def remove(self, event): """Remove charm.""" self.model.unit.status = MaintenanceStatus("Calculating manifests") self.ensure_state() manifest = self.get_manifest() self.model.unit.status = MaintenanceStatus("Removing manifests") self.remove_manifest(manifest) def ensure_state(self): self.state.set_default( username="******", password="".join(choices(ascii_letters, k=30)), salt=bcrypt.gensalt(), user_id=str(uuid4()), ) def get_manifest(self): # Handle ingress ingress = self._get_interface("ingress") if ingress: for app_name, version in ingress.versions.items(): data = { "prefix": "/dex", "rewrite": "/dex", "service": self.model.app.name, "port": self.model.config["port"], } ingress.send_data(data, app_name) # Get OIDC client info oidc = self._get_interface("oidc-client") if oidc: oidc_client_info = list(oidc.get_data().values()) else: oidc_client_info = [] # Load config values as convenient variables connectors = yaml.safe_load(self.model.config["connectors"]) port = self.model.config["port"] public_url = self.model.config["public-url"].lower() if not public_url.startswith(("http://", "https://")): public_url = f"http://{public_url}" static_username = self.model.config[ "static-username"] or self.state.username static_password = self.model.config[ "static-password"] or self.state.password static_password = static_password.encode("utf-8") hashed = bcrypt.hashpw(static_password, self.state.salt).decode("utf-8") static_config = { "enablePasswordDB": True, "staticPasswords": [{ "email": static_username, "hash": hashed, "username": static_username, "userID": self.state.user_id, }], } config = json.dumps({ "issuer": f"{public_url}/dex", "storage": { "type": "kubernetes", "config": { "inCluster": True } }, "web": { "http": f"0.0.0.0:{port}" }, "logger": { "level": "debug", "format": "text" }, "oauth2": { "skipApprovalScreen": True }, "staticClients": oidc_client_info, "connectors": connectors, **static_config, }) # Kubernetes won't automatically restart the pod when the configmap changes # unless we manually add the hash somewhere into the Deployment spec, so that # it changes whenever the configmap changes. config_hash = sha256() config_hash.update(config.encode("utf-8")) context = { "name": self.model.app.name.replace("-operator", ""), "namespace": self.model.name, "port": self.model.config["port"], "config_yaml": config, "config_hash": config_hash.hexdigest(), } return [ obj for path in glob("src/manifests/*.yaml") for obj in codecs.load_all_yaml(Path(path).read_text(), context=context) ] def _check_deployed_resources(self, manifest=None): """Check the status of deployed resources, returning True if ok else raising CheckFailedError All abnormalities are captured in logs Params: manifest: (Optional) list of lightkube objects describing the entire application. If omitted, will be computed using self.get_manifest() """ if manifest: expected_resources = manifest else: expected_resources = self.get_manifest() found_resources = [None] * len(expected_resources) errors = [] client = Client() self.logger.info("Checking for expected resources") for i, resource in enumerate(expected_resources): try: found_resources[i] = client.get( type(resource), resource.metadata.name, namespace=resource.metadata.namespace, ) except ApiError: errors.append( f"Cannot find k8s object for metadata '{resource.metadata}'" ) self.logger.info( "Checking readiness of found StatefulSets/Deployments") statefulsets_ok, statefulsets_errors = validate_statefulsets_and_deployments( found_resources) errors.extend(statefulsets_errors) # Log any errors for err in errors: self.logger.info(err) if len(errors) == 0: return True else: raise CheckFailedError( "Some Kubernetes resources missing/not ready. See logs for details", WaitingStatus, ) def _get_interface(self, interface_name): # Remove this abstraction when SDI adds .status attribute to NoVersionsListed, # NoCompatibleVersionsListed: # https://github.com/canonical/serialized-data-interface/issues/26 try: try: interface = get_interface(self, interface_name) except NoVersionsListed as err: raise CheckFailedError(str(err), WaitingStatus) except CheckFailedError as err: self.logger.debug("_get_interface ~ Checkfailederror catch") self.model.unit.status = err.status self.logger.info(str(err.status)) return None return interface @staticmethod def set_manifest(manifest): client = Client() errors = [] for resource in manifest: try: client.create(resource) except ApiError as err: if err.status.reason == "AlreadyExists": client.patch(type(resource), resource.metadata.name, resource) else: errors.append(err) return errors @staticmethod def remove_manifest(manifest): client = Client() for resource in manifest: client.delete(type(resource), resource.metadata.name)
class CephISCSIGatewayCharmJewel(CephISCSIGatewayCharmBase): state = StoredState() release = 'jewel'
class CephISCSIGatewayCharmBase(ops_openstack.OSBaseCharm): state = StoredState() PACKAGES = ['ceph-iscsi', 'tcmu-runner', 'ceph-common'] CEPH_CAPABILITIES = ["osd", "allow *", "mon", "allow *", "mgr", "allow r"] RESTART_MAP = { '/etc/ceph/ceph.conf': ['rbd-target-api', 'rbd-target-gw'], '/etc/ceph/iscsi-gateway.cfg': ['rbd-target-api'], '/etc/ceph/ceph.client.ceph-iscsi.keyring': ['rbd-target-api'] } DEFAULT_TARGET = "iqn.2003-01.com.ubuntu.iscsi-gw:iscsi-igw" REQUIRED_RELATIONS = ['ceph-client', 'cluster'] # Two has been tested before is probably fine too but needs # validating ALLOWED_UNIT_COUNTS = [2] def __init__(self, framework, key): super().__init__(framework, key) logging.info("Using {} class".format(self.release)) self.state.set_default(target_created=False) self.state.set_default(enable_tls=False) self.state.set_default(additional_trusted_ips=[]) self.ceph_client = interface_ceph_client.CephClientRequires( self, 'ceph-client') self.peers = interface_ceph_iscsi_peer.CephISCSIGatewayPeers( self, 'cluster') self.tls = interface_tls_certificates.TlsRequires(self, "certificates") self.adapters = CephISCSIGatewayAdapters( (self.ceph_client, self.peers, self.tls), self) self.framework.observe(self.on.ceph_client_relation_joined, self) self.framework.observe(self.ceph_client.on.pools_available, self) self.framework.observe(self.peers.on.has_peers, self) self.framework.observe(self.peers.on.ready_peers, self) self.framework.observe(self.on.create_target_action, self) self.framework.observe(self.on.add_trusted_ip_action, self) self.framework.observe(self.on.certificates_relation_joined, self) self.framework.observe(self.on.certificates_relation_changed, self) self.framework.observe(self.on.config_changed, self) self.framework.observe(self.on.upgrade_charm, self) def on_add_trusted_ip_action(self, event): self.state.additional_trusted_ips.append( event.params['ips'].split(' ')) logging.info(self.state.additional_trusted_ips) def on_create_target_action(self, event): gw_client = gwcli_client.GatewayClient() target = event.params.get('iqn', self.DEFAULT_TARGET) gateway_units = event.params.get( 'gateway-units', [u for u in self.peers.ready_peer_details.keys()]) gw_client.create_target(target) for gw_unit, gw_config in self.peers.ready_peer_details.items(): added_gateways = [] if gw_unit in gateway_units: gw_client.add_gateway_to_target(target, gw_config['ip'], gw_config['fqdn']) added_gateways.append(gw_unit) gw_client.create_pool(event.params['pool-name'], event.params['image-name'], event.params['image-size']) gw_client.add_client_to_target(target, event.params['client-initiatorname']) gw_client.add_client_auth(target, event.params['client-initiatorname'], event.params['client-username'], event.params['client-password']) gw_client.add_disk_to_client(target, event.params['client-initiatorname'], event.params['pool-name'], event.params['image-name']) event.set_results({'iqn': target}) def setup_default_target(self): gw_client = gwcli_client.GatewayClient() gw_client.create_target(self.DEFAULT_TARGET) for gw_unit, gw_config in self.peers.ready_peer_details.items(): gw_client.add_gateway_to_target(self.DEFAULT_TARGET, gw_config['ip'], gw_config['fqdn']) self.state.target_created = True def on_ready_peers(self, event): if not self.unit.is_leader(): logging.info("Leader should do setup") return if not self.state.is_started: logging.info("Cannot perform setup yet, not started") event.defer() return if self.state.target_created: logging.info("Initial target setup already complete") return else: # This appears to race and sometime runs before the # peer is 100% ready. There is probably little value # in this anyway so may just remove it. # self.setup_default_target() return def on_has_peers(self, event): logging.info("Unit has peers") if self.unit.is_leader() and not self.peers.admin_password: logging.info("Setting admin password") alphabet = string.ascii_letters + string.digits password = ''.join(secrets.choice(alphabet) for i in range(8)) self.peers.set_admin_password(password) def on_ceph_client_relation_joined(self, event): logging.info("Requesting replicated pool") self.ceph_client.create_replicated_pool( self.model.config['rbd-metadata-pool']) logging.info("Requesting permissions") self.ceph_client.request_ceph_permissions('ceph-iscsi', self.CEPH_CAPABILITIES) self.ceph_client.request_osd_settings({ 'osd heartbeat grace': 20, 'osd heartbeat interval': 5 }) def on_config_changed(self, event): if self.state.is_started: self.on_pools_available(event) self.on_ceph_client_relation_joined(event) def on_upgrade_charm(self, event): if self.state.is_started: self.on_pools_available(event) self.on_ceph_client_relation_joined(event) def on_pools_available(self, event): logging.info("on_pools_available") if not self.peers.admin_password: logging.info("Defering setup") event.defer() return def daemon_reload_and_restart(service_name): subprocess.check_call(['systemctl', 'daemon-reload']) subprocess.check_call(['systemctl', 'restart', service_name]) rfuncs = {'rbd-target-api': daemon_reload_and_restart} @ch_host.restart_on_change(self.RESTART_MAP, restart_functions=rfuncs) def render_configs(): for config_file in self.RESTART_MAP.keys(): ch_templating.render(os.path.basename(config_file), config_file, self.adapters) logging.info("Rendering config") render_configs() logging.info("Setting started state") self.peers.announce_ready() self.state.is_started = True self.update_status() logging.info("on_pools_available: status updated") def on_certificates_relation_joined(self, event): addresses = set() for binding_name in ['public', 'cluster']: binding = self.model.get_binding(binding_name) addresses.add(binding.network.ingress_address) addresses.add(binding.network.bind_address) sans = [str(s) for s in addresses] sans.append(socket.gethostname()) self.tls.request_application_cert(socket.getfqdn(), sans) def on_certificates_relation_changed(self, event): app_certs = self.tls.application_certs if not all([self.tls.root_ca_cert, app_certs]): return if self.tls.chain: # Append chain file so that clients that trust the root CA will # trust certs signed by an intermediate in the chain ca_cert_data = self.tls.root_ca_cert + os.linesep + self.tls.chain pem_data = app_certs['cert'] + os.linesep + app_certs['key'] tls_files = { '/etc/ceph/iscsi-gateway.crt': app_certs['cert'], '/etc/ceph/iscsi-gateway.key': app_certs['key'], '/etc/ceph/iscsi-gateway.pem': pem_data, '/usr/local/share/ca-certificates/vault_ca_cert.crt': ca_cert_data } for tls_file, tls_data in tls_files.items(): with open(tls_file, 'w') as f: f.write(tls_data) subprocess.check_call(['update-ca-certificates']) cert_out = subprocess.check_output( ('openssl x509 -inform pem -in /etc/ceph/iscsi-gateway.pem ' '-pubkey -noout').split()) with open('/etc/ceph/iscsi-gateway-pub.key', 'w') as f: f.write(cert_out.decode('UTF-8')) self.state.enable_tls = True self.on_pools_available(event) def custom_status_check(self): if self.peers.unit_count not in self.ALLOWED_UNIT_COUNTS: self.unit.status = ops.model.BlockedStatus( '{} is an invalid unit count'.format(self.peers.unit_count)) return False return True
class SlurmdbdCharm(CharmBase): """Slurmdbd Charm Class.""" _stored = StoredState() def __init__(self, *args): """Set the defaults for slurmdbd.""" super().__init__(*args) self._stored.set_default(db_info=dict()) self._stored.set_default(munge_key=str()) self._stored.set_default(slurm_installed=False) self._slurm_manager = SlurmManager(self, "slurmdbd") self._slurmdbd = SlurmdbdProvidesRelation(self, "slurmdbd") self._db = MySQLClient(self, "db") event_handler_bindings = { self.on.install: self._on_install, self.on.config_changed: self._write_config_and_restart_slurmdbd, self._db.on.database_available: self._write_config_and_restart_slurmdbd, self._slurmdbd.on.munge_key_available: self._write_config_and_restart_slurmdbd, self._slurmdbd.on.slurmctld_unavailable: self._on_slurmctld_unavailable, self.on.upgrade_charm: self._on_upgrade, } for event, handler in event_handler_bindings.items(): self.framework.observe(event, handler) def _on_install(self, event): self._slurm_manager.install() self._stored.slurm_installed = True self.unit.status = ActiveStatus("Slurm Installed") def _on_upgrade(self, event): """Handle upgrade charm event.""" logger.debug('_on_upgrade(): entering') #self._slurm_manager.upgrade() resource_path = str(self.model.resources.fetch('slurm')) subprocess.call([ "snap", "install", resource_path, "--dangerous", "--classic", ]) def _on_slurmctld_unavailable(self, event): self.unit.status = BlockedStatus("Need relation to slurmctld.") def _check_status(self) -> bool: """Check that we have the things we need.""" db_info = self._stored.db_info munge_key = self._stored.munge_key slurm_installed = self._stored.slurm_installed if not (db_info and slurm_installed and munge_key): if not self._stored.db_info: self.unit.status = BlockedStatus("Need relation to MySQL.") elif not self._stored.munge_key: self.unit.status = BlockedStatus("Need relation to slurmctld.") return False return True def _write_config_and_restart_slurmdbd(self, event): """Check for prereqs before writing config/restart of slurmdbd.""" if not self._check_status(): event.defer() return slurmdbd_host_port_addr = { 'slurmdbd_hostname': socket.gethostname().split(".")[0], 'slurmdbd_port': "6819", } slurmdbd_config = { 'munge_key': self._stored.munge_key, **slurmdbd_host_port_addr, **self.model.config, **self._stored.db_info, } self._slurm_manager.render_config_and_restart(slurmdbd_config) self._slurmdbd.set_slurmdbd_available_on_unit_relation_data() self.unit.status = ActiveStatus("Slurmdbd Available") def set_munge_key(self, munge_key): """Set the munge key in the stored state.""" self._stored.munge_key = munge_key def set_db_info(self, db_info): """Set the db_info in the stored state.""" self._stored.db_info = db_info
class AMQPRequires(Object): """ AMQPRequires class """ on = AMQPServerEvents() _stored = StoredState() def __init__(self, charm, relation_name: str, username: str, vhost: str): super().__init__(charm, relation_name) self.charm = charm self.relation_name = relation_name self.username = username self.vhost = vhost self.framework.observe( self.charm.on[relation_name].relation_joined, self._on_amqp_relation_joined, ) self.framework.observe( self.charm.on[relation_name].relation_changed, self._on_amqp_relation_changed, ) self.framework.observe( self.charm.on[relation_name].relation_departed, self._on_amqp_relation_changed, ) self.framework.observe( self.charm.on[relation_name].relation_broken, self._on_amqp_relation_broken, ) def _on_amqp_relation_joined(self, event): """AMQP relation joined.""" logging.debug("RabbitMQAMQPRequires on_joined") self.on.connected.emit() self.request_access(self.username, self.vhost) def _on_amqp_relation_changed(self, event): """AMQP relation changed.""" logging.debug("RabbitMQAMQPRequires on_changed/departed") if self.password: self.on.ready.emit() def _on_amqp_relation_broken(self, event): """AMQP relation broken.""" logging.debug("RabbitMQAMQPRequires on_broken") self.on.goneaway.emit() @property def _amqp_rel(self) -> Relation: """The AMQP relation.""" return self.framework.model.get_relation(self.relation_name) @property def password(self) -> str: """Return the AMQP password from the server side of the relation.""" return self._amqp_rel.data[self._amqp_rel.app].get("password") @property def hostname(self) -> str: """Return the hostname from the AMQP relation""" return self._amqp_rel.data[self._amqp_rel.app].get("hostname") @property def ssl_port(self) -> str: """Return the SSL port from the AMQP relation""" return self._amqp_rel.data[self._amqp_rel.app].get("ssl_port") @property def ssl_ca(self) -> str: """Return the SSL port from the AMQP relation""" return self._amqp_rel.data[self._amqp_rel.app].get("ssl_ca") @property def hostnames(self) -> List[str]: """Return a list of remote RMQ hosts from the AMQP relation""" _hosts = [] for unit in self._amqp_rel.units: _hosts.append(self._amqp_rel.data[unit].get("ingress-address")) return _hosts def request_access(self, username: str, vhost: str) -> None: """Request access to the AMQP server.""" if self.model.unit.is_leader(): logging.debug("Requesting AMQP user and vhost") self._amqp_rel.data[self.charm.app]["username"] = username self._amqp_rel.data[self.charm.app]["vhost"] = vhost
class GrafanaCharm(CharmBase): """Charm to run Grafana on Kubernetes. This charm allows for high-availability (as long as a non-sqlite database relation is present). Developers of this charm should be aware of the Grafana provisioning docs: https://grafana.com/docs/grafana/latest/administration/provisioning/ """ _stored = StoredState() def __init__(self, *args): super().__init__(*args) # -- initialize states -- self.name = "grafana" self.container = self.unit.get_container(self.name) self.grafana_service = Grafana("localhost", PORT) self._grafana_config_ini_hash = None self._grafana_datasources_hash = None self._stored.set_default( database=dict(), pebble_ready=False, k8s_service_patched=False, admin_password="" ) # -- standard events self.framework.observe(self.on.install, self._on_install) self.framework.observe(self.on.grafana_pebble_ready, self._on_pebble_ready) self.framework.observe(self.on.config_changed, self._on_config_changed) self.framework.observe(self.on.stop, self._on_stop) self.framework.observe(self.on.upgrade_charm, self._on_upgrade_charm) self.framework.observe(self.on.get_admin_password_action, self._on_get_admin_password) # -- grafana_source relation observations self.source_consumer = GrafanaSourceConsumer(self, "grafana-source") self.framework.observe( self.source_consumer.on.sources_changed, self._on_grafana_source_changed, ) self.framework.observe( self.source_consumer.on.sources_to_delete_changed, self._on_grafana_source_changed, ) # -- grafana_dashboard relation observations self.dashboard_consumer = GrafanaDashboardConsumer(self, "grafana-dashboard") self.framework.observe( self.dashboard_consumer.on.dashboards_changed, self._on_dashboards_changed ) # -- database relation observations self.framework.observe(self.on["database"].relation_changed, self._on_database_changed) self.framework.observe(self.on["database"].relation_broken, self._on_database_broken) def _on_install(self, _): """Handler for the install event during which we will update the K8s service.""" self._patch_k8s_service() def _on_config_changed(self, event: ConfigChangedEvent) -> None: """Event handler for the config-changed event. If the configuration is changed, update the variables we know about and restart the services. We don't know specifically whether it's a new install, a relation change, a leader election, or other, so call `_configure` to check the config files Args: event: a :class:`ConfigChangedEvent` to signal that something happened """ self._configure() def _on_grafana_source_changed(self, event: GrafanaSourceEvents) -> None: """When a grafana-source is added or modified, update the config. Args: event: a :class:`GrafanaSourceEvents` instance sent from the provider """ self._configure() def _on_upgrade_charm(self, event: UpgradeCharmEvent) -> None: """Re-provision Grafana and its datasources on upgrade. Args: event: a :class:`UpgradeCharmEvent` to signal the upgrade """ self.source_consumer.upgrade_keys() self._configure() self._on_dashboards_changed(event) def _on_stop(self, _) -> None: """Go into maintenance state if the unit is stopped.""" self.unit.status = MaintenanceStatus("Application is terminating.") def _configure(self) -> None: """Configure Grafana. Generate configuration files and check the sums against what is already stored in the charm. If either the base Grafana config or the datasource config differs, restart Grafana. """ logger.debug("Handling grafana-k8a configuration change") restart = False # Generate a new base config and see if it differs from what we have. # If it does, store it and signal that we should restart Grafana grafana_config_ini = self._generate_grafana_config() config_ini_hash = hashlib.sha256(str(grafana_config_ini).encode("utf-8")).hexdigest() if not self.grafana_config_ini_hash == config_ini_hash: self.grafana_config_ini_hash = config_ini_hash self._update_grafana_config_ini(grafana_config_ini) logger.info("Updated Grafana's base configuration") restart = True # Do the same thing for datasources grafana_datasources = self._generate_datasource_config() datasources_hash = hashlib.sha256(str(grafana_datasources).encode("utf-8")).hexdigest() if not self.grafana_datasources_hash == datasources_hash: self.grafana_datasources_hash = datasources_hash self._update_datasource_config(grafana_datasources) logger.info("Updated Grafana's datasource configuration") restart = True if ( self.container.can_connect() and self.container.get_plan().services != self._build_layer().services ): restart = True if restart: self.restart_grafana() def _update_datasource_config(self, config: str) -> None: """Write an updated datasource configuration file to the Pebble container if necessary. Args: config: A :str: containing the datasource configuration """ container = self.unit.get_container(self.name) try: container.push(DATASOURCES_PATH, config, make_dirs=True) except ConnectionError: logger.error( "Could not push datasource config. Pebble refused connection. Shutting down?" ) def _update_grafana_config_ini(self, config: str) -> None: """Write an updated Grafana configuration file to the Pebble container if necessary. Args: config: A :str: containing the datasource configuration """ try: self.container.push(CONFIG_PATH, config, make_dirs=True) except ConnectionError: logger.error( "Could not push datasource config. Pebble refused connection. Shutting down?" ) @property def has_peers(self) -> bool: """Check whether or nto there are any other Grafanas as peers.""" rel = self.model.get_relation(PEER) return len(rel.units) > 0 if rel is not None else False ############################ # DASHBOARD IMPORT ########################### def init_dashboard_provisioning(self, dashboard_path: str): """Initialise the provisioning of Grafana dashboards. Args: dashboard_path: str; A file path to the dashboard to provision """ self._configure() logger.info("Initializing dashboard provisioning path") container = self.unit.get_container(self.name) dashboard_config = { "apiVersion": 1, "providers": [ { "name": "Default", "type": "file", "options": {"path": dashboard_path}, } ], } default_config = os.path.join(dashboard_path, "default.yaml") default_config_string = yaml.dump(dashboard_config) if not os.path.exists(dashboard_path): try: container.push(default_config, default_config_string, make_dirs=True) except ConnectionError: logger.warning( "Could not push default dashboard configuration. Pebble shutting down?" ) def _on_dashboards_changed(self, event) -> None: """Handle dashboard events.""" container = self.unit.get_container(self.name) dashboards_dir_path = os.path.join(PROVISIONING_PATH, "dashboards") self.init_dashboard_provisioning(dashboards_dir_path) if not container.can_connect(): logger.debug("Cannot connect to Pebble yet, deferring event") event.defer() return dashboards_file_to_be_kept = {} try: for dashboard_file in container.list_files(dashboards_dir_path, pattern="juju_*.json"): dashboards_file_to_be_kept[dashboard_file.path] = False for dashboard in self.dashboard_consumer.dashboards: dashboard_content = dashboard["content"] dashboard_content_bytes = dashboard_content.encode("utf-8") dashboard_content_digest = hashlib.sha256(dashboard_content_bytes).hexdigest() dashboard_filename = "juju_{}_{}.json".format( dashboard["charm"], dashboard_content_digest[0:7] ) path = os.path.join(dashboards_dir_path, dashboard_filename) dashboards_file_to_be_kept[path] = True logger.debug("New dashboard %s", path) container.push(path, dashboard_content_bytes, make_dirs=True) for dashboard_file_path, to_be_kept in dashboards_file_to_be_kept.items(): if not to_be_kept: container.remove_path(dashboard_file_path) logger.debug("Removed dashboard %s", dashboard_file_path) self.restart_grafana() except ConnectionError: logger.exception("Could not update dashboards. Pebble shutting down?") ##################################### # K8S WRANGLING ##################################### def _patch_k8s_service(self): """Fix the Kubernetes service that was setup by Juju with correct port numbers.""" if self.unit.is_leader() and not self._stored.k8s_service_patched: service_ports = [ (self.app.name, PORT, PORT), ] try: K8sServicePatch.set_ports(self.app.name, service_ports) except PatchFailed as e: logger.error("Unable to patch the Kubernetes service: %s", str(e)) else: self._stored.k8s_service_patched = True logger.info("Successfully patched the Kubernetes service!") ##################################### # DATABASE EVENTS ##################################### @property def has_db(self) -> bool: """Only consider a DB connection if we have config info.""" return len(self._stored.database) > 0 def _on_database_changed(self, event: RelationChangedEvent) -> None: """Sets configuration information for database connection. Args: event: A :class:`RelationChangedEvent` from a `database` source """ if not self.unit.is_leader(): return # Get required information database_fields = { field: event.relation.data[event.app].get(field) for field in REQUIRED_DATABASE_FIELDS } # if any required fields are missing, warn the user and return missing_fields = [ field for field in REQUIRED_DATABASE_FIELDS if database_fields.get(field) is None ] if len(missing_fields) > 0: raise SourceFieldsMissingError( "Missing required data fields for database relation: {}".format(missing_fields) ) # add the new database relation data to the datastore self._stored.database.update( {field: value for field, value in database_fields.items() if value is not None} ) self._configure() def _on_database_broken(self, event: RelationBrokenEvent) -> None: """Removes database connection info from datastore. We are guaranteed to only have one DB connection, so clearing datastore.database is all we need for the change to be propagated to the Pebble container. Args: event: a :class:`RelationBrokenEvent` from a `database` source """ if not self.unit.is_leader(): return # remove the existing database info from datastore self._stored.database = dict() logger.info("Removing the grafana-k8s database backend config") # Cleanup the config file self._configure() def _generate_grafana_config(self) -> str: """Generate a database configuration for Grafana. For now, this only creates database information, since everything else can be set in ENV variables, but leave for expansion later so we can hide auth secrets """ return self._generate_database_config() if self.has_db else "" def _generate_database_config(self) -> str: """Generate a database configuration. Returns: A string containing the required database information to be stubbed into the config file. """ db_config = self._stored.database config_ini = configparser.ConfigParser() db_type = "mysql" db_url = "{0}://{1}:{2}@{3}/{4}".format( db_type, db_config.get("user"), db_config.get("password"), db_config.get("host"), db_config.get("name"), ) config_ini["database"] = { "type": db_type, "host": db_config.get("host"), "name": db_config.get("name", ""), "user": db_config.get("user", ""), "password": db_config.get("password", ""), "url": db_url, } # This is silly, but a ConfigParser() handles this nicer than # raw string manipulation data = StringIO() config_ini.write(data) data.seek(0) ret = data.read() data.close() return ret ##################################### # PEBBLE OPERATIONS ##################################### def _on_pebble_ready(self, event) -> None: """When Pebble is ready, start everything up.""" self._stored.pebble_ready = True self._configure() def restart_grafana(self) -> None: """Restart the pebble container. `container.replan()` is intentionally avoided, since if no environment variables are changed, this will not actually restart Grafana, which is necessary to reload the provisioning files. Note that Grafana does not support SIGHUP, so a full restart is needed. """ layer = self._build_layer() try: plan = self.container.get_plan() if plan.services != layer.services: self.container.add_layer(self.name, layer, combine=True) if self.container.get_service(self.name).is_running(): self.container.stop(self.name) self.container.start(self.name) logger.info("Restarted grafana-k8s") self.unit.status = ActiveStatus() except ConnectionError: logger.error( "Could not restart grafana-k8s -- Pebble socket does " "not exist or is not responsive" ) def _parse_grafana_path(self, parts: ParseResult) -> dict: """Convert web_external_url into a usable path.""" # urlparse.path parsing is absolutely horrid and only # guarantees any kind of sanity if there is a scheme if not parts.scheme and not parts.path.startswith("/"): # This could really be anything! logger.warning( "Could not determine web_external_url for Grafana. Please " "use a fully-qualified path or a bare subpath" ) return {} return { "scheme": parts.scheme or "http", "host": "0.0.0.0", "port": parts.netloc.split(":")[1] if ":" in parts.netloc else PORT, "path": parts.path, } def _build_layer(self) -> Layer: """Construct the pebble layer information.""" # Placeholder for when we add "proper" mysql support for HA extra_info = { "GF_DATABASE_TYPE": "sqlite3", } grafana_path = self.model.config.get("web_external_url", "") # We have to do this dance because urlparse() doesn't have any good # truthiness, and parsing an empty string is still 'true' if grafana_path: parts = self._parse_grafana_path(urlparse(grafana_path)) # It doesn't matter unless there's a subpath, since the # redirect to login is fine with a bare hostname if parts and parts["path"]: extra_info.update( { "GF_SERVER_SERVE_FROM_SUB_PATH": "True", "GF_SERVER_ROOT_URL": "{}://{}:{}{}".format( parts["scheme"], parts["host"], parts["port"], parts["path"] ), } ) layer = Layer( { "summary": "grafana-k8s layer", "description": "grafana-k8s layer", "services": { self.name: { "override": "replace", "summary": "grafana-k8s service", "command": "grafana-server -config {}".format(CONFIG_PATH), "startup": "enabled", "environment": { "GF_SERVER_HTTP_PORT": PORT, "GF_LOG_LEVEL": self.model.config["log_level"], "GF_PATHS_PROVISIONING": PROVISIONING_PATH, "GF_SECURITY_ADMIN_USER": self.model.config["admin_user"], "GF_SECURITY_ADMIN_PASSWORD": self._get_admin_password(), **extra_info, }, } }, } ) return layer @property def grafana_version(self): """Grafana server version.""" info = self.grafana_service.build_info if info: return info.get("version", None) return None @property def grafana_config_ini_hash(self) -> str: """Returns the hash for the Grafana ini file.""" return self._grafana_config_ini_hash or self._get_hash_for_file(CONFIG_PATH) @grafana_config_ini_hash.setter def grafana_config_ini_hash(self, hash: str) -> None: """Sets the Grafana config ini hash.""" self._grafana_config_ini_hash = hash @property def grafana_datasources_hash(self) -> str: """Returns the hash for the Grafana ini file.""" return self._grafana_datasources_hash or self._get_hash_for_file(DATASOURCES_PATH) @grafana_datasources_hash.setter def grafana_datasources_hash(self, hash: str) -> None: """Sets the Grafana config ini hash.""" self._grafana_datasources_hash = hash def _get_hash_for_file(self, file: str) -> str: """Tries to connect to the container and hash a file. Args: file: a `str` filepath to read """ if self.container.can_connect(): try: content = self.container.pull(file) hash = hashlib.sha256(str(content.read()).encode("utf-8")).hexdigest() return hash except (FileNotFoundError, ProtocolError, PathError) as e: logger.warning( "Could not read configuration from the Grafana workload container: {}".format( e ) ) return "" @property def build_info(self) -> dict: """Returns information about the running Grafana service.""" return self.grafana_service.build_info def _generate_datasource_config(self) -> str: """Template out a Grafana datasource config. Template using the sources (and removed sources) the consumer knows about, and dump it to YAML. Returns: A a string-dumped YAML config for the datasources """ # Boilerplate for the config file datasources_dict = {"apiVersion": 1, "datasources": [], "deleteDatasources": []} for source_info in self.source_consumer.sources: source = { "orgId": "1", "access": "proxy", "isDefault": "false", "name": source_info["source_name"], "type": source_info["source_type"], "url": source_info["url"], } datasources_dict["datasources"].append(source) # type: ignore[attr-defined] # Also get a list of all the sources which have previously been purged and add them for name in self.source_consumer.sources_to_delete: source = {"orgId": 1, "name": name} datasources_dict["deleteDatasources"].append(source) # type: ignore[attr-defined] datasources_string = yaml.dump(datasources_dict) return datasources_string def _on_get_admin_password(self, event: ActionEvent) -> None: """Returns the password for the admin user as an action response.""" if self.grafana_service.password_has_been_changed( self.model.config["admin_user"], self._get_admin_password() ): event.set_results( {"admin-password": "******"} ) else: event.set_results({"admin-password": self._get_admin_password()}) def _get_admin_password(self) -> str: """Returns the password for the admin user.""" if not self._stored.admin_password: self._stored.admin_password = self._generate_password() return self._stored.admin_password def _generate_password(self) -> str: """Generates a random 12 character password.""" # Really limited by what can be passed into shell commands, since this all goes # through subprocess. So much for complex password chars = string.ascii_letters + string.digits return "".join(secrets.choice(chars) for _ in range(12))
class Charm(CharmBase): _stored = StoredState() def __init__(self, *args): super().__init__(*args) self._stored.set_default( try_excepthook=False, on_install=[], on_start=[], on_config_changed=[], on_update_status=[], on_leader_settings_changed=[], on_db_relation_joined=[], on_mon_relation_changed=[], on_mon_relation_departed=[], on_ha_relation_broken=[], on_foo_bar_action=[], on_start_action=[], _on_get_model_name_action=[], on_collect_metrics=[], on_test_pebble_ready=[], on_log_critical_action=[], on_log_error_action=[], on_log_warning_action=[], on_log_info_action=[], on_log_debug_action=[], # Observed event type names per invocation. A list is used to preserve the # order in which charm handlers have observed the events. observed_event_types=[], ) self.framework.observe(self.on.install, self._on_install) self.framework.observe(self.on.start, self._on_start) self.framework.observe(self.on.config_changed, self._on_config_changed) self.framework.observe(self.on.update_status, self._on_update_status) self.framework.observe(self.on.leader_settings_changed, self._on_leader_settings_changed) # Test relation events with endpoints from different # sections (provides, requires, peers) as well. self.framework.observe(self.on.db_relation_joined, self._on_db_relation_joined) self.framework.observe(self.on.mon_relation_changed, self._on_mon_relation_changed) self.framework.observe(self.on.mon_relation_departed, self._on_mon_relation_departed) self.framework.observe(self.on.ha_relation_broken, self._on_ha_relation_broken) self.framework.observe(self.on.test_pebble_ready, self._on_test_pebble_ready) actions = self.charm_dir / 'actions.yaml' if actions.exists() and actions.read_bytes(): self.framework.observe(self.on.start_action, self._on_start_action) self.framework.observe(self.on.foo_bar_action, self._on_foo_bar_action) self.framework.observe(self.on.get_model_name_action, self._on_get_model_name_action) self.framework.observe(self.on.get_status_action, self._on_get_status_action) self.framework.observe(self.on.log_critical_action, self._on_log_critical_action) self.framework.observe(self.on.log_error_action, self._on_log_error_action) self.framework.observe(self.on.log_warning_action, self._on_log_warning_action) self.framework.observe(self.on.log_info_action, self._on_log_info_action) self.framework.observe(self.on.log_debug_action, self._on_log_debug_action) self.framework.observe(self.on.collect_metrics, self._on_collect_metrics) if os.getenv('TRY_EXCEPTHOOK', False): raise RuntimeError("failing as requested") def _on_install(self, event): self._stored.on_install.append(type(event).__name__) self._stored.observed_event_types.append(type(event).__name__) def _on_start(self, event): self._stored.on_start.append(type(event).__name__) self._stored.observed_event_types.append(type(event).__name__) def _on_config_changed(self, event): self._stored.on_config_changed.append(type(event).__name__) self._stored.observed_event_types.append(type(event).__name__) event.defer() def _on_update_status(self, event): self._stored.on_update_status.append(type(event).__name__) self._stored.observed_event_types.append(type(event).__name__) def _on_leader_settings_changed(self, event): self._stored.on_leader_settings_changed.append(type(event).__name__) self._stored.observed_event_types.append(type(event).__name__) def _on_db_relation_joined(self, event): assert event.app is not None, 'application name cannot be None for a relation-joined event' self._stored.on_db_relation_joined.append(type(event).__name__) self._stored.observed_event_types.append(type(event).__name__) self._stored.db_relation_joined_data = event.snapshot() def _on_mon_relation_changed(self, event): assert event.app is not None, ( 'application name cannot be None for a relation-changed event') if os.environ.get('JUJU_REMOTE_UNIT'): assert event.unit is not None, ( 'a unit name cannot be None for a relation-changed event' ' associated with a remote unit') self._stored.on_mon_relation_changed.append(type(event).__name__) self._stored.observed_event_types.append(type(event).__name__) self._stored.mon_relation_changed_data = event.snapshot() def _on_mon_relation_departed(self, event): assert event.app is not None, ( 'application name cannot be None for a relation-departed event') self._stored.on_mon_relation_departed.append(type(event).__name__) self._stored.observed_event_types.append(type(event).__name__) self._stored.mon_relation_departed_data = event.snapshot() def _on_ha_relation_broken(self, event): assert event.app is None, ( 'relation-broken events cannot have a reference to a remote application') assert event.unit is None, ( 'relation broken events cannot have a reference to a remote unit') self._stored.on_ha_relation_broken.append(type(event).__name__) self._stored.observed_event_types.append(type(event).__name__) self._stored.ha_relation_broken_data = event.snapshot() def _on_test_pebble_ready(self, event): assert event.workload is not None, ( 'workload events must have a reference to a container') self._stored.on_test_pebble_ready.append(type(event).__name__) self._stored.observed_event_types.append(type(event).__name__) self._stored.test_pebble_ready_data = event.snapshot() def _on_start_action(self, event): assert event.handle.kind == 'start_action', ( 'event action name cannot be different from the one being handled') self._stored.on_start_action.append(type(event).__name__) self._stored.observed_event_types.append(type(event).__name__) def _on_foo_bar_action(self, event): assert event.handle.kind == 'foo_bar_action', ( 'event action name cannot be different from the one being handled') self._stored.on_foo_bar_action.append(type(event).__name__) self._stored.observed_event_types.append(type(event).__name__) def _on_get_status_action(self, event): self._stored.status_name = self.unit.status.name self._stored.status_message = self.unit.status.message def _on_collect_metrics(self, event): self._stored.on_collect_metrics.append(type(event).__name__) self._stored.observed_event_types.append(type(event).__name__) event.add_metrics({'foo': 42}, {'bar': 4.2}) def _on_log_critical_action(self, event): logger.critical('super critical') def _on_log_error_action(self, event): logger.error('grave error') def _on_log_warning_action(self, event): logger.warning('wise warning') def _on_log_info_action(self, event): logger.info('useful info') def _on_log_debug_action(self, event): logger.debug('insightful debug') def _on_get_model_name_action(self, event): self._stored._on_get_model_name_action.append(self.model.name)
class CephISCSIGatewayPeers(Object): on = CephISCSIGatewayPeerEvents() state = StoredState() PASSWORD_KEY = 'admin_password' READY_KEY = 'gateway_ready' FQDN_KEY = 'gateway_fqdn' ALLOWED_IPS_KEY = 'allowed_ips' def __init__(self, charm, relation_name): super().__init__(charm, relation_name) self.relation_name = relation_name self.this_unit = self.framework.model.unit self.state.set_default( allowed_ips=[]) self.framework.observe( charm.on[relation_name].relation_changed, self.on_changed) def on_changed(self, event): logging.info("CephISCSIGatewayPeers on_changed") self.on.has_peers.emit() if self.ready_peer_details: self.on.ready_peers.emit() if self.allowed_ips != self.state.allowed_ips: self.on.allowed_ips_changed.emit() self.state.allowed_ips = self.allowed_ips def set_admin_password(self, password): logging.info("Setting admin password") self.peer_rel.data[self.peer_rel.app][self.PASSWORD_KEY] = password def set_allowed_ips(self, ips, append=True): logging.info("Setting allowed ips: {}".format(append)) trusted_ips = [] if append and self.allowed_ips: trusted_ips = self.allowed_ips trusted_ips.extend(ips) trusted_ips = sorted(list(set(trusted_ips))) ip_str = json.dumps(trusted_ips) self.peer_rel.data[self.peer_rel.app][self.ALLOWED_IPS_KEY] = ip_str def announce_ready(self): logging.info("announcing ready") self.peer_rel.data[self.this_unit][self.READY_KEY] = 'True' self.peer_rel.data[self.this_unit][self.FQDN_KEY] = self.fqdn @property def ready_peer_details(self): peers = { self.framework.model.unit.name: { 'fqdn': self.fqdn, 'ip': self.cluster_bind_address}} for u in self.peer_rel.units: if self.peer_rel.data[u].get(self.READY_KEY) == 'True': peers[u.name] = { 'fqdn': self.peer_rel.data[u][self.FQDN_KEY], 'ip': self.peer_rel.data[u]['ingress-address']} return peers @property def fqdn(self): return socket.getfqdn() @property def is_joined(self): return self.peer_rel is not None @property def peer_rel(self): return self.framework.model.get_relation(self.relation_name) @property def peer_binding(self): return self.framework.model.get_binding(self.peer_rel) @property def cluster_bind_address(self): return str(self.peer_binding.network.bind_address) @property def admin_password(self): if not self.peer_rel: return None return self.peer_rel.data[self.peer_rel.app].get(self.PASSWORD_KEY) @property def allowed_ips(self): if not self.peer_rel: return None ip_str = self.peer_rel.data[self.peer_rel.app].get( self.ALLOWED_IPS_KEY, '[]') return json.loads(ip_str) @property def peer_addresses(self): addresses = [self.cluster_bind_address] for u in self.peer_rel.units: addresses.append(self.peer_rel.data[u]['ingress-address']) return sorted(addresses) @property def peer_count(self): if self.peer_rel: return len(self.peer_rel.units) else: return 0 @property def unit_count(self): return self.peer_count + 1
class CephISCSIGatewayCharmBase(ops_openstack.core.OSBaseCharm): state = StoredState() PACKAGES = ['ceph-iscsi', 'tcmu-runner', 'ceph-common'] CEPH_CAPABILITIES = ["osd", "allow *", "mon", "allow *", "mgr", "allow r"] DEFAULT_TARGET = "iqn.2003-01.com.ubuntu.iscsi-gw:iscsi-igw" REQUIRED_RELATIONS = ['ceph-client', 'cluster'] # Two has been tested but four is probably fine too but needs # validating ALLOWED_UNIT_COUNTS = [2] CEPH_CONFIG_PATH = Path('/etc/ceph') CEPH_ISCSI_CONFIG_PATH = CEPH_CONFIG_PATH / 'iscsi' GW_CONF = CEPH_CONFIG_PATH / 'iscsi-gateway.cfg' CEPH_CONF = CEPH_ISCSI_CONFIG_PATH / 'ceph.conf' GW_KEYRING = CEPH_ISCSI_CONFIG_PATH / 'ceph.client.ceph-iscsi.keyring' TLS_KEY_PATH = CEPH_CONFIG_PATH / 'iscsi-gateway.key' TLS_PUB_KEY_PATH = CEPH_CONFIG_PATH / 'iscsi-gateway-pub.key' TLS_CERT_PATH = CEPH_CONFIG_PATH / 'iscsi-gateway.crt' TLS_KEY_AND_CERT_PATH = CEPH_CONFIG_PATH / 'iscsi-gateway.pem' TLS_CA_CERT_PATH = Path( '/usr/local/share/ca-certificates/vault_ca_cert.crt') GW_SERVICES = ['rbd-target-api', 'rbd-target-gw'] RESTART_MAP = { str(GW_CONF): GW_SERVICES, str(CEPH_CONF): GW_SERVICES, str(GW_KEYRING): GW_SERVICES } release = 'default' def __init__(self, framework): super().__init__(framework) logging.info("Using {} class".format(self.release)) self.state.set_default(target_created=False, enable_tls=False) self.ceph_client = ceph_client.CephClientRequires(self, 'ceph-client') self.peers = interface_ceph_iscsi_peer.CephISCSIGatewayPeers( self, 'cluster') self.ca_client = ca_client.CAClient(self, 'certificates') self.adapters = CephISCSIGatewayAdapters( (self.ceph_client, self.peers, self.ca_client), self) self.framework.observe(self.ceph_client.on.broker_available, self.request_ceph_pool) self.framework.observe(self.ceph_client.on.pools_available, self.render_config) self.framework.observe(self.peers.on.has_peers, self.on_has_peers) self.framework.observe(self.peers.on.allowed_ips_changed, self.render_config) self.framework.observe(self.ca_client.on.tls_app_config_ready, self.on_tls_app_config_ready) self.framework.observe(self.ca_client.on.ca_available, self.on_ca_available) self.framework.observe(self.on.config_changed, self.render_config) self.framework.observe(self.on.upgrade_charm, self.render_config) self.framework.observe(self.on.create_target_action, self.on_create_target_action) self.framework.observe(self.on.add_trusted_ip_action, self.on_add_trusted_ip_action) def on_install(self, event): if ch_host.is_container(): logging.info("Installing into a container is not supported") self.update_status() else: self.install_pkgs() def on_has_peers(self, event): logging.info("Unit has peers") if self.unit.is_leader() and not self.peers.admin_password: logging.info("Setting admin password") alphabet = string.ascii_letters + string.digits password = ''.join(secrets.choice(alphabet) for i in range(8)) self.peers.set_admin_password(password) def request_ceph_pool(self, event): logging.info("Requesting replicated pool") self.ceph_client.create_replicated_pool( self.model.config['rbd-metadata-pool']) logging.info("Requesting permissions") self.ceph_client.request_ceph_permissions('ceph-iscsi', self.CEPH_CAPABILITIES) self.ceph_client.request_osd_settings({ 'osd heartbeat grace': 20, 'osd heartbeat interval': 5 }) def refresh_request(self, event): self.render_config(event) self.request_ceph_pool(event) def render_config(self, event): if not self.peers.admin_password: logging.info("Defering setup") print("Defering setup admin") event.defer() return if not self.ceph_client.pools_available: print("Defering setup pools") logging.info("Defering setup") event.defer() return self.CEPH_ISCSI_CONFIG_PATH.mkdir(exist_ok=True, mode=0o750) def daemon_reload_and_restart(service_name): subprocess.check_call(['systemctl', 'daemon-reload']) subprocess.check_call(['systemctl', 'restart', service_name]) rfuncs = {'rbd-target-api': daemon_reload_and_restart} @ch_host.restart_on_change(self.RESTART_MAP, restart_functions=rfuncs) def _render_configs(): for config_file in self.RESTART_MAP.keys(): ch_templating.render(os.path.basename(config_file), config_file, self.adapters) logging.info("Rendering config") _render_configs() logging.info("Setting started state") self.peers.announce_ready() self.state.is_started = True self.update_status() logging.info("on_pools_available: status updated") def on_ca_available(self, event): addresses = set() for binding_name in ['public', 'cluster']: binding = self.model.get_binding(binding_name) addresses.add(binding.network.ingress_address) addresses.add(binding.network.bind_address) sans = [str(s) for s in addresses] sans.append(socket.gethostname()) self.ca_client.request_application_certificate(socket.getfqdn(), sans) def on_tls_app_config_ready(self, event): self.TLS_KEY_PATH.write_bytes( self.ca_client.application_key.private_bytes( encoding=serialization.Encoding.PEM, format=serialization.PrivateFormat.TraditionalOpenSSL, encryption_algorithm=serialization.NoEncryption())) self.TLS_CERT_PATH.write_bytes( self.ca_client.application_certificate.public_bytes( encoding=serialization.Encoding.PEM)) self.TLS_CA_CERT_PATH.write_bytes( self.ca_client.ca_certificate.public_bytes( encoding=serialization.Encoding.PEM)) self.TLS_KEY_AND_CERT_PATH.write_bytes( self.ca_client.application_certificate.public_bytes( encoding=serialization.Encoding.PEM) + b'\n' + self.ca_client.application_key.private_bytes( encoding=serialization.Encoding.PEM, format=serialization.PrivateFormat.TraditionalOpenSSL, encryption_algorithm=serialization.NoEncryption())) self.TLS_PUB_KEY_PATH.write_bytes( self.ca_client.application_key.public_key().public_bytes( format=serialization.PublicFormat.SubjectPublicKeyInfo, encoding=serialization.Encoding.PEM)) subprocess.check_call(['update-ca-certificates']) self.state.enable_tls = True self.render_config(event) def custom_status_check(self): if ch_host.is_container(): self.unit.status = ops.model.BlockedStatus( 'Charm cannot be deployed into a container') return False if self.peers.unit_count not in self.ALLOWED_UNIT_COUNTS: self.unit.status = ops.model.BlockedStatus( '{} is an invalid unit count'.format(self.peers.unit_count)) return False return True # Actions def on_add_trusted_ip_action(self, event): if self.unit.is_leader(): ips = event.params.get('ips').split() self.peers.set_allowed_ips(ips, append=not event.params['overwrite']) self.render_config(event) else: event.fail("Action must be run on leader") def on_create_target_action(self, event): gw_client = gwcli_client.GatewayClient() target = event.params.get('iqn', self.DEFAULT_TARGET) gateway_units = event.params.get( 'gateway-units', [u for u in self.peers.ready_peer_details.keys()]) gw_client.create_target(target) for gw_unit, gw_config in self.peers.ready_peer_details.items(): added_gateways = [] if gw_unit in gateway_units: gw_client.add_gateway_to_target(target, gw_config['ip'], gw_config['fqdn']) added_gateways.append(gw_unit) gw_client.create_pool(event.params['pool-name'], event.params['image-name'], event.params['image-size']) gw_client.add_client_to_target(target, event.params['client-initiatorname']) gw_client.add_client_auth(target, event.params['client-initiatorname'], event.params['client-username'], event.params['client-password']) gw_client.add_disk_to_client(target, event.params['client-initiatorname'], event.params['pool-name'], event.params['image-name']) event.set_results({'iqn': target})
class KafkaOperator(CharmBase): """Charm to run Kafka on Kubernetes.""" _stored = StoredState() def __init__(self, *args): super().__init__(*args) self.framework.observe(self.on.kafka_pebble_ready, self._on_kafka_pebble_ready) self.framework.observe(self.on.list_topics_action, self.list_topics) def _restart_kafka(self): logger.info("Restarting kafka ...") container = self.unit.get_container(SERVICE) container.get_plan().to_yaml() status = container.get_service(SERVICE) if status.current == ServiceStatus.ACTIVE: container.stop(SERVICE) self.unit.status = MaintenanceStatus("kafka maintenance") container.start(SERVICE) self.unit.status = ActiveStatus("kafka restarted") def _on_kafka_pebble_ready(self, event: PebbleReadyEvent) -> None: container = self.unit.get_container(SERVICE) logger.info("_on_kafka_pebble_ready") logger.info("_start_kafka") layer = Layer(raw=self._kafka_layer()) container.add_layer(SERVICE, layer, combine=True) container.autostart() self.unit.status = ActiveStatus("kafka started") def _kafka_layer(self) -> dict: config = self.model.config cluster_id = config["cluster_id"] layer = { "summary": "kafka layer", "description": "kafka layer", "services": { "kafka-setup": { "override": "replace", "summary": "kafka setup step - initialize & format storage", "command": f"{KAFKA_BASE_DIR}/bin/kafka-storage.sh format -t {cluster_id} -c {KAFKA_BASE_DIR}/config/kraft/server.properties", "startup": "enabled", }, "kafka": { "override": "replace", "summary": "kafka service", "command": f"{KAFKA_BASE_DIR}/bin/kafka-server-start.sh {KAFKA_BASE_DIR}/config/kraft/server.properties", "startup": "enabled", "requires": ["kafka-setup"], }, }, } return layer def list_topics(self, event): logger.info("Listing topics") command = [ f"{KAFKA_BASE_DIR}/bin/kafka-topics.sh", "--list", "--bootstrap-server", "localhost:9092" ] results = {} process = subprocess.run(command, check=True, stdout=subprocess.PIPE, universal_newlines=True) results["result"] = process.stdout event.set_results(results)
class MongoDBCharm(CharmBase): state = StoredState() on = MongoDBClusterEvents() def __init__(self, *args): super().__init__(*args) self.state.set_default(started=False) self.state.set_default(pod_spec=None) self.port = MONGODB_PORT self.image = OCIImageResource(self, "mongodb-image") # Register all of the events we want to observe self.framework.observe(self.on.install, self.configure_pod) self.framework.observe(self.on.config_changed, self.configure_pod) self.framework.observe(self.on.upgrade_charm, self.configure_pod) self.framework.observe(self.on.start, self.on_start) self.framework.observe(self.on.update_status, self.on_update_status) # Peer relation self.cluster = MongoDBCluster(self, "cluster", self.port) self.framework.observe(self.on.cluster_relation_changed, self.reconfigure) self.framework.observe(self.on.cluster_relation_departed, self.reconfigure) # Cluster Events self.framework.observe(self.on.mongodb_started, self.on_mongodb_started) logger.debug("MongoDBCharm initialized!") # ############################################# # ########## CHARM HOOKS HANDLERS ############# # ############################################# # hooks: install, config-changed, upgrade-charm def configure_pod(self, event): # Continue only if the unit is the leader if not self.unit.is_leader(): self.on_update_status(event) return logger.debug("Running configuring_pod") # Check problems in the settings problems = self._check_settings() if problems: self.unit.status = BlockedStatus(problems) return # Fetch image information try: self.unit.status = WaitingStatus("Fetching image information") image_info = self.image.fetch() except OCIImageResourceError: self.unit.status = BlockedStatus( "Error fetching image information") return # Build Pod spec self.unit.status = BlockedStatus("Assembling pod spec") pod_spec = make_pod_spec( image_info, self.port, replica_set_name=self.replica_set_name if not self.standalone else None, ) # Update pod spec if the generated one is different # from the one previously applied if self.state.pod_spec != pod_spec: self.model.pod.set_spec(pod_spec) self.state.pod_spec = pod_spec self.on_update_status(event) logger.debug("Running configuring_pod finished") # hooks: start def on_start(self, event): if not self.unit.is_leader(): return logger.debug("Running on_start") if MongoConnector.ready(self.standalone_uri): self.on.mongodb_started.emit() else: # This event is not being retriggered before update_status event.defer() return # Can't call update_status because an infinite loop might happen # due to the fact I'm calling on_start from update_status # self.on_update_status(event) logger.debug("Running on_start finished") # hooks: update-status def on_update_status(self, event): status_message = "" if self.standalone: status_message += "standalone-mode: " if MongoConnector.ready(self.standalone_uri): status_message += "ready" self.unit.status = ActiveStatus(status_message) else: status_message += "service not ready yet" self.unit.status = WaitingStatus(status_message) else: status_message += f"replica-set-mode({self.replica_set_name}): " if MongoConnector.ready(self.standalone_uri): status_message += "ready" if self.unit.is_leader(): if self.cluster.ready: hosts_count = len(self.cluster.replica_set_hosts) status_message += f" ({hosts_count} members)" else: status_message += " (replica set not initialized yet)" # Since on_start is not being properly triggered, # I'm calling it manually here. self.on.start.emit() self.unit.status = WaitingStatus(status_message) return self.unit.status = ActiveStatus(status_message) else: status_message += "service not ready yet" self.unit.status = WaitingStatus(status_message) # ############################################# # ####### PEER RELATION HOOK HANDLERS ######### # ############################################# # hooks: cluster-relation-changed, cluster-relation-departed def reconfigure(self, event): logger.debug("Running reconfigure") if (self.unit.is_leader() and self.cluster.replica_set_initialized and self.cluster.need_replica_set_reconfiguration()): uri = self.replica_set_uri config = MongoConnector.replset_get_config(uri) config = MongoConnector.replset_generate_config( self.cluster.hosts, self.replica_set_name, increase_version=True, config=config, ) MongoConnector.replset_reconfigure(uri, config) self.on.replica_set_configured.emit(self.cluster.hosts) self.on_update_status(event) logger.debug("Running reconfigure finished") # ############################################# # ######### CLUSTER EVENT HANDLERS ############ # ############################################# def on_mongodb_started(self, event): if not self.unit.is_leader() or self.standalone: return logger.debug("Running on_mongodb_started") if not self.cluster.replica_set_initialized: self.unit.status = WaitingStatus("Initializing the replica set") config = MongoConnector.replset_generate_config( self.cluster.hosts, self.replica_set_name) MongoConnector.replset_initialize(self.standalone_uri, config) self.on.replica_set_configured.emit(self.cluster.hosts) self.on.cluster_ready.emit() logger.debug("Running on_mongodb_started finished") # ############################################# # ############## PROPERTIES ################### # ############################################# @property def replica_set_name(self): return self.model.config["replica_set_name"] @property def standalone(self): return self.model.config["standalone"] # ############################################# # ############# PRIVATE METHODS ############### # ############################################# def _check_settings(self): problems = [] config = self.model.config for setting in REQUIRED_SETTINGS: if config.get(setting) is None: problem = f"missing config {setting}" problems.append(problem) if not self.standalone: for setting in REQUIRED_SETTINGS_NOT_STANDALONE: if not config.get(setting): problem = f"missing config {setting}" problems.append(problem) return ";".join(problems) @property def replica_set_uri(self): uri = "mongodb://" for i, host in enumerate(self.cluster.hosts): if i: uri += "," uri += f"{host}:{self.port}" uri += f"/?replicaSet={self.replica_set_name}" return uri @property def standalone_uri(self): return f"mongodb://{self.model.app.name}:{self.port}/"
class EnodebCharm(CharmBase): state = StoredState() def __init__(self, *args): super().__init__(*args) # An example of setting charm state # that's persistent across events self.state.set_default(is_started=False) if not self.state.is_started: self.state.is_started = True # Register all of the events we want to observe for event in ( # Charm events self.on.config_changed, self.on.install, self.on.upgrade_charm, self.on.register_action, ): self.framework.observe(event, self) def on_config_changed(self, event): """Handle changes in configuration""" unit = self.model.unit def on_install(self, event): """Called when the charm is being installed""" unit = self.model.unit # Install your software and its dependencies unit.status = ActiveStatus() def on_upgrade_charm(self, event): """Upgrade the charm.""" unit = self.model.unit # Mark the unit as under Maintenance. unit.status = MaintenanceStatus("Upgrading charm") self.on_install(event) # When maintenance is done, return to an Active state unit.status = ActiveStatus() def on_register_action(self, event): """Register to AGW (EPC).""" try: mme_addr = event.params["mme-addr"] gtp_bind_addr = event.params["gtp-bind-addr"] s1c_bind_addr = event.params["s1c-bind-addr"] command = " ".join([ "srsenb", "--enb.name=dummyENB01", "--enb.mcc=901", "--enb.mnc=70", "--enb.mme_addr={}".format(mme_addr), "--enb.gtp_bind_addr={}".format(gtp_bind_addr), "--enb.s1c_bind_addr={}".format(s1c_bind_addr), "--enb_files.rr_config=/config/rr.conf", "--enb_files.sib_config=/config/sib.conf", "--enb_files.drb_config=/config/drb.conf", "/config/enb.conf.fauxrf", ]) stdout = subprocess.check_output(command, shell=True) event.set_results({"output": stdout}) except subprocess.CalledProcessError as ex: event.fail(ex)
class Slurmd(Object): """Slurmd.""" on = SlurmdRequiresEvents() _state = StoredState() def __init__(self, charm, relation_name): """Set self._relation_name and self.charm.""" super().__init__(charm, relation_name) self._charm = charm self._relation_name = relation_name self.framework.observe( self._charm.on[self._relation_name].relation_created, self._on_relation_created) self.framework.observe( self._charm.on[self._relation_name].relation_joined, self._on_relation_joined) self.framework.observe( self._charm.on[self._relation_name].relation_changed, self._on_relation_changed) self.framework.observe( self._charm.on[self._relation_name].relation_broken, self._on_relation_broken) def _on_relation_created(self, event): # Check that slurm has been installed so that we know the munge key is # available. Defer if slurm has not been installed yet. if not self._charm.is_slurm_installed(): event.defer() return # Get the munge_key from the slurm_ops_manager and set it to the app # data on the relation to be retrieved on the other side by slurmdbd. app_relation_data = event.relation.data[self.model.app] app_relation_data['munge_key'] = self._charm.get_munge_key() def _on_relation_joined(self, event): partition_name = event.relation.data[event.app].get('partition_name') if not partition_name: event.defer() return if not self._charm.get_default_partition(): self._charm.set_default_partition(partition_name) def _on_relation_changed(self, event): event_app_data = event.relation.data.get(event.app) if event_app_data: slurmd_info = event_app_data.get('slurmd_info') if slurmd_info: self._charm.set_slurmd_available(True) self.on.slurmd_available.emit() else: event.defer() return def _on_relation_broken(self, event): if self.framework.model.unit.is_leader(): event.relation.data[self.model.app]['munge_key'] = "" self.set_slurm_config_on_app_relation_data("") self._charm.set_slurmd_available(False) def _assemble_slurm_configurator_inventory(self): """Assemble the slurm-configurator partition.""" hostname = socket.gethostname() inventory = get_inventory(hostname, hostname) return { 'inventory': [inventory], 'partition_name': 'configurator', 'partition_state': 'DRAIN', 'partition_config': '' } def get_slurmd_info(self): """Return the node info for units of applications on the relation.""" nodes_info = [] relations = self.framework.model.relations['slurmd'] for relation in relations: app = relation.app if app: app_data = relation.data.get(app) if app_data: slurmd_info = app_data.get('slurmd_info') if slurmd_info: nodes_info.append(json.loads(slurmd_info)) slurm_configurator = self._assemble_slurm_configurator_inventory() nodes_info.append(slurm_configurator) return nodes_info def set_slurm_config_on_app_relation_data( self, slurm_config, ): """Set the slurm_conifg to the app data on the relation. Setting data on the relation forces the units of related applications to observe the relation-changed event so they can acquire and render the updated slurm_config. """ relations = self._charm.framework.model.relations['slurmd'] for relation in relations: app_relation_data = relation.data[self.model.app] app_relation_data['slurm_config'] = json.dumps(slurm_config)
class ExternalSecretsCharm(CharmBase): _stored = StoredState() def __init__(self, *args): super().__init__(*args) self.framework.observe(self.on.config_changed, self._on_config_changed) def _on_config_changed(self, _): logger.info("entering _on_config_changed") pod_spec = self._build_pod_spec() self.model.pod.set_spec(pod_spec) self.unit.status = ActiveStatus(f'{self.app.name} pod ready') def _build_crds(self): crds = [] try: crds = [ yaml.load(Path(f).read_text(), Loader=yaml.FullLoader) for f in [ 'files/external_secret_crd.yaml', 'files/secret_store_crd.yaml', ] ] except yaml.YAMLError as e: logger.error('could not read yaml file', e) return return crds def _build_rules(self): rules = {} try: rules = yaml.load(open(Path('files/rbac.yaml'), 'r'), Loader=yaml.FullLoader) except yaml.YAMLError as e: logger.error('could not read yaml file', e) return return rules def _build_pod_spec(self): crds = self._build_crds() rules = self._build_rules() custom_resource_definitions = [{ 'name': crd.get('metadata').get('name'), 'spec': crd.get('spec'), } for crd in crds] spec = { 'version': 3, 'kubernetesResources': { 'customResourceDefinitions': custom_resource_definitions, }, 'serviceAccount': { 'roles': [{ 'global': True, 'rules': rules.get('rules'), }], }, 'containers': [{ 'name': self.app.name, 'imageDetails': { 'imagePath': 'containersol/externalsecret-operator:master' }, 'ports': [{ 'containerPort': 8080, 'protocol': 'TCP', 'name': 'operator' }], 'command': ['/manager', '--enable-leader-election'] }] } return spec
class CAClient(Object): """Provides a client type that handles the interaction with CA charms. It mainly provides: * an indication that a CA unit is available to accept requests for certificates; * a method to provide details (CN, SANs) to the CA for generating certificates; * an indication that a certificate and a key have been generated; * a way to retrieve the generated certificate and key as well as a CA certificate. """ on = CAClientEvents() _stored = StoredState() REQUEST_KEYS = { 'legacy': '', 'server': 'cert_requests', 'client': 'client_cert_requests', 'application': 'application_cert_requests' } PROCESSED_KEYS = { 'legacy': '', 'server': 'processed_requests', 'client': 'processed_client_requests', 'application': 'processed_application_requests' } def __init__(self, charm, relation_name): """ :param charm: the charm object to be used as a parent object. :type charm: :class: `ops.charm.CharmBase` """ super().__init__(charm, relation_name) self._relation_name = self.relation_name = relation_name self._common_name = None self._sans = None self._munged_name = self.model.unit.name.replace("/", "_") self._stored.set_default(ca_certificate=None, key=None, certificate=None, root_ca_chain=None, legacy=None, client=None, server=None, application=None) self.framework.observe(charm.on[relation_name].relation_joined, self._on_relation_joined) self.framework.observe(charm.on[relation_name].relation_changed, self._on_relation_changed) self.ready_events = { 'legacy': self.on.tls_config_ready, 'server': self.on.tls_server_config_ready, 'client': self.on.tls_client_config_ready, 'application': self.on.tls_app_config_ready } def _on_relation_joined(self, event): self.on.ca_available.emit() @property def is_joined(self): """Whether this charm has joined the relation.""" rel = self.framework.model.get_relation(self._relation_name) return rel is not None @property def is_ready(self): """Whether this charm has fulfilled the legacy certificate requests.""" return self._is_cert_ready('legacy') def _is_cert_ready(self, cert_type): """Check whether there is a response for the cert_type. :param cert_type: Certificate type :type cert_type: str :returns: Whether there is a response for the cert_type :rtype: bool """ try: return all([self.ca_certificate, getattr(self._stored, cert_type)]) except CAClientError: return False @property def is_application_cert_ready(self): """Have application certificate requests been fulfilled. :returns: Whether requests have been fulfilled :rtype: bool """ return self._is_cert_ready('application') @property def is_server_cert_ready(self): """Have server certificate requests been fulfilled. :returns: Whether requests have been fulfilled :rtype: bool """ return self._is_cert_ready('server') @property def is_client_cert_ready(self): """Have client certificate requests been fulfilled. :returns: Whether requests have been fulfilled :rtype: bool """ return self._is_cert_ready('client') def _get_certs_and_keys(self, request_type): """For the given request_type return the certs and keys from the CA. :param request_type: Certificate type :type request_type: str :returns: Dictionary keyed on CN of certs and keys :rtype: Dict[str, Union[ default_backend.openssl.rsa.openssl.rsa._RSAPrivateKey, default_backend.openssl.openssl.x509._Certificate]] :raises: CAClientError """ if not self._is_certificate_requested(request_type): raise CAClientError(BlockedStatus, 'a certificate request has not been sent', self._relation_name) crypto_data = getattr(self._stored, request_type) if not crypto_data: raise CAClientError( WaitingStatus, 'a {} has not been obtained yet.'.format(request_type), self._relation_name) pem_data = {} for cn, data in crypto_data.items(): pem_data[cn] = { 'key': load_pem_private_key(data['key'].encode('utf-8'), password=None, backend=default_backend()), 'cert': load_pem_x509_certificate(data['cert'].encode('utf-8'), backend=default_backend()) } if pem_data: default_entry = sorted(pem_data.keys())[0] pem_data['default'] = pem_data[default_entry] return pem_data def _get_certificate(self, txt_cert): """Return the certificate object for the given string. :param txt_cert: Text of certificate. :type txt_cert: str :returns: Certificate :rtype: default_backend.openssl.x509._Certificate :raises: CAClientError """ if not self._any_certificate_requested(): raise CAClientError(BlockedStatus, 'a certificate request has not been sent', self._relation_name) if txt_cert is None: raise CAClientError(WaitingStatus, 'certificate has not been obtained yet.', self._relation_name) return load_pem_x509_certificate(txt_cert.encode('utf-8'), backend=default_backend()) @property def ca_certificate(self): """Return the CA certificate. :returns: Certificate :rtype: default_backend.openssl.x509._Certificate :raises: CAClientError """ return self._get_certificate(self._stored.ca_certificate) @property def root_ca_chain(self): """Return the CA chain certificate. :returns: Certificate :rtype: default_backend.openssl.x509._Certificate :raises: CAClientError """ return self._get_certificate(self._stored.root_ca_chain) @property def certificate(self): """Certificate from CA for certificate request using legacy method. :returns: Certificate :rtype: default_backend.openssl.x509._Certificate :raises: CAClientError """ return self._get_certs_and_keys('legacy')[ self._legacy_request_cn]['cert'] @property def server_certificate(self): """Certificate from CA for server certificate request. This method should not be used if multiple certificates were requested. Instead use self.server_certs() :returns: Certificate :rtype: default_backend.openssl.x509._Certificate :raises: CAClientError """ return self._get_certs_and_keys('server')['default']['cert'] @property def client_certificate(self): """Certificate from CA for client certificate request. This method should not be used if multiple certificates were requested. Instead use self.client_certs() :returns: Certificate :rtype: default_backend.openssl.x509._Certificate :raises: CAClientError """ return self._get_certs_and_keys('client')['default']['cert'] @property def application_certificate(self): """Certificate from CA for application certificate request. This method should not be used if multiple certificates were requested. Instead use self.application_certs() :returns: Certificate :rtype: default_backend.openssl.x509._Certificate :raises: CAClientError """ return self._get_certs_and_keys('application')['default']['cert'] @property def key(self): """Key from CA for certificate request using legacy method. :returns: Key :rtype: default_backend.openssl.rsa.openssl.rsa._RSAPrivateKey :raises: CAClientError """ return self._get_certs_and_keys('legacy')[ self._legacy_request_cn]['key'] @property def server_key(self): """Key from CA for server certificate request. This method should not be used if multiple certificates were requested. Instead use self.server_certs() :returns: Key :rtype: default_backend.openssl.rsa.openssl.rsa._RSAPrivateKey :raises: CAClientError """ return self._get_certs_and_keys('server')['default']['key'] @property def client_key(self): """Key from CA for client certificate request. This method should not be used if multiple certificates were requested. Instead use self.client_certs() :returns: Key :rtype: default_backend.openssl.rsa.openssl.rsa._RSAPrivateKey :raises: CAClientError """ return self._get_certs_and_keys('client')['default']['key'] @property def application_key(self): """Key from CA for application certificate request. This method should not be used if multiple certificates were requested. Instead use self.application_certs() :returns: Key :rtype: default_backend.openssl.rsa.openssl.rsa._RSAPrivateKey :raises: CAClientError """ return self._get_certs_and_keys('application')['default']['key'] @property def application_certs(self): """Application Certificates and keys returned by CA :returns: Dictionary keyed on CN of certs and keys :rtype: Dict[str, Union[ default_backend.openssl.rsa.openssl.rsa._RSAPrivateKey, default_backend.openssl.openssl.x509._Certificate]] :raises: CAClientError """ return self._get_certs_and_keys('application') @property def server_certs(self): """Server Certificates and keys returned by CA :returns: Dictionary keyed on CN of certs and keys :rtype: Dict[str, Union[ default_backend.openssl.rsa.openssl.rsa._RSAPrivateKey, default_backend.openssl.openssl.x509._Certificate]] :raises: CAClientError """ return self._get_certs_and_keys('server') @property def client_certs(self): """Client Certificates and keys returned by CA :returns: Dictionary keyed on CN of certs and keys :rtype: Dict[str, Union[ default_backend.openssl.rsa.openssl.rsa._RSAPrivateKey, default_backend.openssl.openssl.x509._Certificate]] :raises: CAClientError """ return self._get_certs_and_keys('client') @property def _legacy_request_cn(self): """The common name used for a certificate request using legacy method. :param common_name: Common name :type common_name: str """ cn = None rel = self.framework.model.get_relation(self._relation_name) if rel: cn = rel.data[self.framework.model.unit].get('common_name') return cn def request_certificate(self, common_name, sans, certificate_type=None): """Request a new server certificate. If arguments have not changed from a previous request, then a different certificate will not be generated. This method can be useful if a list of SANS has changed during the lifetime of a charm and a new certificate needs to be generated. :param common_name: a new common name to use in a certificate. :type common_name: str :param sans: a list of Subject Alternative Names to use in a certificate. :type common_name: list(str) """ key = self.REQUEST_KEYS[certificate_type] rel = self.framework.model.get_relation(self._relation_name) if rel is None: raise CAClientError(BlockedStatus, 'missing relation', self._relation_name) logger.info('Requesting a CA certificate. Common name: {}, SANS: {}' ''.format(common_name, sans)) requests = rel.data[self.model.unit].get(key, '{}') requests = json.loads(requests) requests[common_name] = {'sans': sans} rel.data[self.model.unit][key] = json.dumps(requests, sort_keys=True) if certificate_type == 'server': # for backwards compatibility, request goes in its own fields rel_data = rel.data[self.model.unit] rel_data['common_name'] = common_name rel_data['sans'] = json.dumps(sans) request_server_certificate = functools.partialmethod( request_certificate, certificate_type='server') request_client_certificate = functools.partialmethod( request_certificate, certificate_type='client') request_application_certificate = functools.partialmethod( request_certificate, certificate_type='application') def _is_certificate_requested(self, request_type): """Has a request beed sent of this type. :param request_type: Certificate type :type request_type: str :returns: Whether a request has been sent. :rtype: bool """ return bool(self._get_all_requests().get(request_type)) def _any_certificate_requested(self): """Have any certificate requests been sent :returns: Whether a request has been sent. :rtype: bool """ return any([i for i in self._get_all_requests().values()]) def _get_legacy_response(self, remote_data): """Retrieve response from CA using legacy method. :param remote_data: Data returned by CA :type remote_data: ops.model.RelationDataContent :returns: Dict keyed on cn of key and cert :rtype: Dict[str, str] """ certs_data = {} cert = remote_data.get('{}.server.cert'.format(self._munged_name)) key = remote_data.get('{}.server.key'.format(self._munged_name)) if all([self._legacy_request_cn, cert, key]): certs_data = {self._legacy_request_cn: {'key': key, 'cert': cert}} return certs_data def _get_request_response(self, request_type, remote_data): """Retrieve response from CA using legacy method. :param remote_data: Data returned by CA :type remote_data: ops.model.RelationDataContent :returns: Dict keyed on cn of key and cert :rtype: Dict[str, str] """ rq_key = self.PROCESSED_KEYS[request_type] certs_data = {} if rq_key: field = '{}.{}'.format(self._munged_name, rq_key) json_certs_data = remote_data.get(field) if json_certs_data: certs_data = json.loads(json_certs_data) # If a server cert was requested by the legacy top level mechanism # then make sure it is included in the server certs dict. if request_type == 'server': certs_data.update(self._get_legacy_response(remote_data)) else: certs_data = self._get_legacy_response(remote_data) return certs_data def _store_certificates(self, request_type, crypto_data): """Store the response from the CA for the given request type. :param request_type: Certificate type :type request_type: str :param crypto_data: Data returned by CA for request. Expected to be: crypto_data is in the for {'cn': {'cert': str, 'key': str}} :type crypto_data: Dict[str, Dict[str, str]] """ setattr(self._stored, request_type, crypto_data) def _get_all_requests(self): """Get all the certificate requests this unit has made. :returns: Dict keyed on request type {'application': { 'cn': {'cert':, 'key':}... :rtype: Dict[str, Dict[str, Dict[str, str]]] """ requests = {} rel = self.framework.model.get_relation(self._relation_name) if rel is None: return requests unit_data = rel.data[self.framework.model.unit] for request_type, request_key in self.REQUEST_KEYS.items(): if request_type == 'legacy': cn = unit_data.get('common_name') if cn: requests[request_type] = { cn: { 'sans': json.loads(unit_data.get('sans', '[]')) } } else: requests[request_type] = json.loads( unit_data.get(request_key, '{}')) return requests def _valid_response(self, response): """Check if data from CA for request is valid. :param response: Certificate type :type response: Union[str, None] :returns: If response is valid :rtype: bool """ if response: return all([response.get('cert'), response.get('key')]) else: return False def _on_relation_changed(self, event): """Check if requests have been processed and emit events accorfdingly. Check which requests have been processes. If all requests of a particular type have been completed them emit the corresponding event. :raises: CAClientError """ remote_data = event.relation.data[event.unit] ca = remote_data.get('ca') if not ca: return self._stored.ca_certificate = ca chain = remote_data.get('chain') if chain: self._stored.root_ca_chain = chain requests = self._get_all_requests() for request_type, request in requests.items(): if not request: continue response = self._get_request_response(request_type, remote_data) if request_type == 'application': req_keys = ['app_data'] else: req_keys = request.keys() for key in req_keys: if not self._valid_response(response.get(key)): message = ( 'A CA has not yet processed requests: {}'.format(key)) logger.info(message) raise CAClientError(WaitingStatus, message, self._relation_name) else: # All requests of this type have completed so emit the # corresponding event self._store_certificates(request_type, response) self.ready_events[request_type].emit()
class SimpleProxyCharm(CharmBase): state = StoredState() def __init__(self, *args): super().__init__(*args) # An example of setting charm state # that's persistent across events self.state.set_default(is_started=False) if not self.state.is_started: self.state.is_started = True # Register all of the events we want to observe for event in ( # Charm events self.on.config_changed, self.on.install, self.on.upgrade_charm, # Charm actions (primitives) self.on.touch_action, # OSM actions (primitives) self.on.start_action, self.on.stop_action, self.on.restart_action, self.on.reboot_action, self.on.upgrade_action, # SSH Proxy actions (primitives) self.on.generate_ssh_key_action, self.on.get_ssh_public_key_action, self.on.run_action, self.on.verify_ssh_credentials_action, ): self.framework.observe(event, self) def get_ssh_proxy(self): """Get the SSHProxy instance""" proxy = SSHProxy( hostname=self.model.config["ssh-hostname"], username=self.model.config["ssh-username"], password=self.model.config["ssh-password"], ) return proxy def on_config_changed(self, event): """Handle changes in configuration""" unit = self.model.unit # Unit should go into a waiting state until verify_ssh_credentials is successful unit.status = WaitingStatus("Waiting for SSH credentials") proxy = self.get_ssh_proxy() verified = proxy.verify_credentials() if verified: unit.status = ActiveStatus() else: unit.status = BlockedStatus("Invalid SSH credentials.") def on_install(self, event): """Called when the charm is being installed""" unit = self.model.unit if not SSHProxy.has_ssh_key(): unit.status = MaintenanceStatus("Generating SSH keys...") print("Generating SSH Keys") SSHProxy.generate_ssh_key() unit.status = ActiveStatus() def on_touch_action(self, event): """Touch a file.""" try: filename = event.params["filename"] proxy = self.get_ssh_proxy() stdout, stderr = proxy.run("touch {}".format(filename)) event.set_results({"output": stdout}) except Exception as ex: event.fail(ex) def on_upgrade_charm(self, event): """Upgrade the charm.""" unit = self.model.unit # Mark the unit as under Maintenance. unit.status = MaintenanceStatus("Upgrading charm") self.on_install(event) # When maintenance is done, return to an Active state unit.status = ActiveStatus() ############### # OSM methods # ############### def on_start_action(self, event): """Start the VNF service on the VM.""" pass def on_stop_action(self, event): """Stop the VNF service on the VM.""" pass def on_restart_action(self, event): """Restart the VNF service on the VM.""" pass def on_reboot_action(self, event): """Reboot the VM.""" proxy = self.get_ssh_proxy() stdout, stderr = proxy.run("sudo reboot") if len(stderr): event.fail(stderr) def on_upgrade_action(self, event): """Upgrade the VNF service on the VM.""" pass ##################### # SSH Proxy methods # ##################### def on_generate_ssh_key_action(self, event): """Generate a new SSH keypair for this unit.""" if not SSHProxy.generate_ssh_key(): event.fail("Unable to generate ssh key") def on_get_ssh_public_key_action(self, event): """Get the SSH public key for this unit.""" pubkey = SSHProxy.get_ssh_public_key() event.set_results({"pubkey": SSHProxy.get_ssh_public_key()}) def on_run_action(self, event): """Run an arbitrary command on the remote host.""" cmd = event.params["command"] proxy = self.get_ssh_proxy() stdout, stderr = proxy.run(cmd) event.set_results({"output": stdout}) if len(stderr): event.fail(stderr) def on_verify_ssh_credentials_action(self, event): """Verify the SSH credentials for this unit.""" proxy = self.get_ssh_proxy() verified = proxy.verify_credentials() if verified: print("Verified!") event.set_results({"verified": True}) else: print("Verification failed!") event.set_results({"verified": False})
class TrainingCharm(CharmBase): _stored = StoredState() def __init__(self, *args): super().__init__(*args) self.framework.observe(self.on.config_changed, self._on_config_changed) def _on_config_changed(self, _=None): pod_spec = self._build_pod_spec() self.model.pod.set_spec(pod_spec) self.unit.status = ActiveStatus("Grafana pod ready.") def _build_pod_spec(self): port = self.model.config["grafana_port"] config_content = self._build_grafana_ini() spec = { "containers": [{ "name": self.app.name, "imageDetails": { "imagePath": "grafana/grafana:7.2.1-ubuntu" }, "ports": [{ "containerPort": port, "protocol": "TCP" }], "readinessProbe": { "httpGet": { "path": "/api/health", "port": port }, "initialDelaySeconds": 10, "timeoutSeconds": 30, }, "files": [{ "name": "grafana-config-ini", "mountPath": "/etc/grafana", "files": { "grafana.ini": config_content }, }], "config": {}, # used to store hashes of config file text }] } return spec def _build_grafana_ini(self): config_text = textwrap.dedent(""" [server] http_port = {0} [security] admin_user = {1} admin_password = {2} """.format( self.model.config["grafana_port"], self.model.config["admin_username"], self.model.config["admin_password"], )) return config_text
class MSSQLCharm(CharmBase): on = MSSQLCharmEvents() state = StoredState() def __init__(self, parent, key): super().__init__(parent, key) self.framework.observe(self.on.install, self.set_pod_spec) # self.framework.observe(self.on.start, self) self.framework.observe(self.on.stop, self) self.framework.observe(self.on.config_changed, self) self.framework.observe(self.on.db_relation_joined, self) self.framework.observe(self.on.db_relation_changed, self) self.framework.observe(self.on.mssql_ready, self) self.state.set_default(spec=None) def on_stop(self, event): log('Ran on_stop') def on_config_changed(self, event): log('Ran on_config_changed hook') self.set_pod_spec(event) def on_mssql_ready(self, event): pass def on_db_relation_joined(self, event): self._state['on_db_relation_joined'].append(type(event)) self._state['observed_event_types'].append(type(event)) self._state['db_relation_joined_data'] = event.snapshot() self._write_state() def on_db_relation_changed(self, event): if not self.state.ready: event.defer() return def set_pod_spec(self, event): if not self.model.unit.is_leader(): print('Not a leader, skipping set_pod_spec') self.model.unit.status = ActiveStatus() return self.model.unit.status = MaintenanceStatus('Setting pod spec') log('Adding secret to container_config', level='INFO') config = self.framework.model.config container_config = self.sanitized_container_config() container_config['mssql-secret'] = {'secret': {'name': 'mssql'}} log('Validating ports syntax', level='INFO') ports = yaml.safe_load(self.framework.model.config["ports"]) if not isinstance(ports, list): self.model.unit.status = \ BlockedStatus("ports is not a list of YAMLs") return log('Validating password', level='INFO') check_password = self.framework.model.config["sa_password"] if len(check_password) < 8 \ or len(check_password) > 20 \ or not any(char.isupper() for char in check_password) \ or not any(char.isdigit() for char in check_password): self.model.unit.status = \ BlockedStatus("sa_password does not respect criteria") return sa_password = b64encode( (check_password).encode('utf-8')).decode('utf-8') log('Setting pod spec', level='INFO') self.framework.model.pod.set_spec({ 'version': 3, 'containers': [{ 'name': self.framework.model.app.name, 'image': config["image"], 'ports': ports, 'envConfig': container_config, }], 'kubernetesResources': { 'secrets': [{ 'name': 'mssql', 'type': 'Opaque', 'data': { 'SA_PASSWORD': sa_password, } }] }, 'serviceAccount': { 'roles': [{ 'global': True, 'rules': [ { 'apiGroups': ['apps'], 'resources': ['statefulsets', 'deployments'], 'verbs': ['*'], }, { 'apiGroups': [''], 'resources': ['pods', 'pods/exec'], 'verbs': [ 'create', 'get', 'list', 'watch', 'update', 'patch' ], }, { 'apiGroups': [''], 'resources': ['configmaps'], 'verbs': ['get', 'watch', 'list'], }, { 'apiGroups': [''], 'resources': ['persistentvolumeclaims'], 'verbs': ['create', 'delete'], }, ], }] }, # "restartPolicy": 'Always', # "terminationGracePeriodSeconds": 10, }) self.model.unit.status = ActiveStatus() return def sanitized_container_config(self): """Uninterpolated container config without secrets""" config = self.framework.model.config if config["container_config"].strip() == "": container_config = {} else: container_config = \ yaml.safe_load(self.framework.model.config["container_config"]) if not isinstance(container_config, dict): self.framework.model.unit.status = \ BlockedStatus("container_config is not a YAML mapping") return None return container_config
class CharmK8SSparkCharm(CharmBase): """Charm the service.""" _stored = StoredState() def __init__(self, *args): super().__init__(*args) # self.framework.observe(self.on.config_changed, self._on_config_changed) for event in ( # Charm events # (self.on.config_changed, self.on_config_changed), (self.on.start, self.on_start), # (self.on.upgrade_charm, self.on_upgrade_charm) ): self.framework.observe(*event) # self.framework.observe(self.on.fortune_action, self._on_fortune_action) self._stored.set_default(things=[]) def _apply_spec(self, spec): # Only apply the spec if this unit is a leader. if self.framework.model.unit.is_leader(): self.framework.model.pod.set_spec(spec) self._stored.spec = spec def make_pod_spec(self): config = self.framework.model.config return { 'version': 3, 'containers': [{ 'envConfig': { 'SPARK_MODE': 'master', 'SPARK_DAEMON_MEMORY': '', 'SPARK_MASTER_PORT': '7077', 'SPARK_MASTER_WEBUI_PORT': '8080' }, 'image': config["image"], 'imagePullPolicy': 'IfNotPresent', 'kubernetes': { 'livenessProbe': { 'failureThreshold': 6, 'httpGet': { 'path': '/', 'port': 8080, 'scheme': 'HTTP' }, 'initialDelaySeconds': 180, 'periodSeconds': 20, 'successThreshold': 1, 'timeoutSeconds': 5 }, 'readinessProbe': { 'failureThreshold': 6, 'httpGet': { 'path': '/', 'port': 8080, 'scheme': 'HTTP' }, 'initialDelaySeconds': 30, 'periodSeconds': 10, 'successThreshold': 1, 'timeoutSeconds': 5 }, # 'resources': { # 'requests': { # 'cpu': '100m' # } # }, }, 'name': 'spark-master', 'ports': [{ 'containerPort': 8080, 'name': 'http', 'protocol': 'TCP' }, { 'containerPort': 7077, 'name': 'cluster', 'protocol': 'TCP' }] }], 'kubernetesResources': { # 'serviceAccounts':[{ # 'name': 'default', # 'automountServiceToken': True, # # Check cluster # # 'subdomain': 'juju-app-spark-headless', # # 'terminationGracePeriodSeconds': 30, # }], # 'tolerations': [ # { # 'effect': 'NoExecute', # 'key': 'node.kubernetes.io/not-ready', # 'operator': 'Exists', # 'tolerationSeconds': 300 # }, # { # 'effect': 'NoExecute', # 'key': 'node.kubernetes.io/unreachable', # 'operator': 'Exists', # 'tolerationSeconds': 300 # } # ], # }], 'pod': { 'annotations': { 'kubernetes.io/limit-ranger': 'limitranger plugin set: cpu request for container spark-master' # 'helm.sh/chart': 'spark-5.0.1', }, 'labels': { 'foo': 'bax', 'app.kubernetes.io/name': 'spark', # 'controller-revision-hash': 'my-release-spark-master-cc85fcbf4', # 'statefulset.kubernetes.io/pod-name': 'my-release-spark-master-0' }, # 'activeDeadlineSeconds': 10, # 'terminationMessagePath': '/dev/termination-log', # 'terminationMessagePolicy': 'File', # 'restartPolicy': 'OnFailure', # 'terminationGracePeriodSeconds': 30, # 'automountServiceAccountToken': True, # 'hostNetwork': True, # 'hostPID': True, 'dnsPolicy': 'ClusterFirstWithHostNet', 'securityContext': { 'runAsNonRoot': True, 'fsGroup': 14 }, 'priorityClassName': 'top', 'priority': 30, 'readinessGates': [ { 'conditionType': 'PodScheduled', }, ], } } } def on_start(self, event): """Called when the charm is being installed""" unit = self.model.unit unit.status = MaintenanceStatus("Applying pod spec") new_pod_spec = self.make_pod_spec() self._apply_spec(new_pod_spec) unit.status = ActiveStatus() def on_config_changed(self, _): unit = self.model.unit # current = self.config["thing"] new_spec = self.make_pod_spec() # if self._stored.spec != new_spec: unit.status = MaintenanceStatus("Appling new pod spec") self._apply_spec(new_spec) unit.status = ActiveStatus() def _on_fortune_action(self, event): fail = event.params["fail"] if fail: event.fail(fail) else: event.set_results({ "fortune": "A bug in the code is worth two in the documentation." }) def on_upgrade_charm(self, event): """Upgrade the charm.""" # raise NotImplementedError("TODO") unit = self.model.unit # Mark the unit as under Maintenance. unit.status = MaintenanceStatus("Upgrading charm") self.on_start(event) # When maintenance is done, return to an Active state unit.status = ActiveStatus()
class NRPEClient(Object): on = NRPEClientEvents() state = StoredState() nrpe_confdir = '/etc/nagios/nrpe.d' nagios_exportdir = '/var/lib/nagios/export' check_template = """ #--------------------------------------------------- # This file is Juju managed #--------------------------------------------------- command[%(check_name)s]=%(command)s """ service_template = (""" #--------------------------------------------------- # This file is Juju managed #--------------------------------------------------- define service { use active-service host_name %(hostname)s service_description %(hostname)s[%(check_name)s] """ """%(description)s check_command check_nrpe!%(check_name)s servicegroups %(servicegroup)s } """) def __init__(self, charm, relation_name='nrpe-external-master'): super().__init__(charm, relation_name) self._relation_name = relation_name self.state.set_default(checks={}, dirty=False, nrpe_ready=False) self.framework.observe(charm.on[relation_name].relation_changed, self.on_relation_changed) @property def is_joined(self): return self.framework.model.get_relation(self._relation_name) is not None @property def is_available(self): return self.state.nrpe_ready def add_check(self, command: List[str], name: str, description: str = None, hostname: str = None): """ Register a new check to be executed by NRPE. Call NRPEClient.commit() to save changes. If a check with the same name already exists, it will by updated. :param command: A string array containing the command to be executed :param name: Human readable name for the check :param description: A short description of the check :param hostname: Unit hostname. Defaults to a combination of nagios_context and unit name """ nagios_context = self.model.config['nagios_context'] nagios_servicegroups = self.model.config.get('nagios_servicegroups') or nagios_context unit_name = self.model.unit.name.replace("/", "_") hostname = hostname or f"{nagios_context}-{unit_name}" if not description: description = f'{name} {unit_name}' new_check = { 'command': command, 'description': description, 'hostname': hostname, 'servicegroup': nagios_servicegroups, } if name not in self.state.checks or self.state.checks[name] != new_check: self.state.dirty = True self.state.checks[name] = new_check def remove_check(self, name: str): self.state.checks.pop(name, None) def commit(self): """Commit checks to NRPE and Nagios""" if not self.state.dirty: logger.info('Skipping NRPE commit as nothing changed') return if not self.state.nrpe_ready: logger.info('NRPE relation is not ready') return self._write_check_files() self._publish_to_nagios() subprocess.check_call(['systemctl', 'restart', 'nagios-nrpe-server']) self.state.dirty = False logger.info(f'Successfully updated NRPE checks: {", ".join(c for c in self.state.checks)}') def _write_check_files(self): """Register the new checks with NRPE and place their configuration files in the appropriate locations""" for check_name in self.state.checks: check = self.state.checks[check_name] check_filename = os.path.join(self.nrpe_confdir, f'{check_name}.cfg') check_args = { 'check_name': check_name, 'command': ' '.join(check['command']) } with open(check_filename, 'w') as check_config: check_config.write(self.check_template % check_args) service_filename = os.path.join(self.nagios_exportdir, 'service__{}_{}.cfg'.format(check['hostname'], check_name)) service_args = { 'hostname': check['hostname'], 'description': check['description'], 'check_name': check_name, 'servicegroup': check['servicegroup'] } with open(service_filename, 'w') as service_config: service_config.write(self.service_template % service_args) def _publish_to_nagios(self): """Publish check data on the monitors relation""" rel = self.framework.model.get_relation(self._relation_name) rel_data = rel.data[self.model.unit] rel_data['version'] = '0.3' nrpe_monitors = {} for check_name in self.state.checks: nrpe_monitors[check_name] = {'command': check_name} rel_data['monitors'] = yaml.dump({"monitors": {"remote": {"nrpe": nrpe_monitors}}}) def on_relation_changed(self, event): if not self.state.nrpe_ready: self.state.nrpe_ready = True self.on.nrpe_available.emit()
class CephISCSIGatewayCharmOcto(CephISCSIGatewayCharmBase): state = StoredState() release = 'octopus'
class MariaDbCharm(CharmBase): state = StoredState() def __init__(self, *args): super().__init__(*args) self.state.set_default(isStarted=False) self.mysql = MySQL(self, 'mysql') # The latest version of the Operator Framework will raise an error # if you simply provided `self` as the second argument. It now requires # that you always explicitly declare the handler for the event. self.framework.observe(self.on.start, self.on_start) self.framework.observe(self.on.stop, self.on_stop) self.framework.observe(self.on.update_status, self.on_update_status) self.framework.observe(self.on.config_changed, self.on_config_changed) self.framework.observe(self.on.upgrade_charm, self.on_upgrade_charm) self.framework.observe(self.on.leader_elected, self.on_leader_elected) self.framework.observe(self.on.mysql_relation_joined, self.on_mysql_relation_joined) self.framework.observe(self.on.mysql_relation_changed, self.on_mysql_relation_changed) self.framework.observe(self.on.mysql_relation_departed, self.on_mysql_relation_departed) self.framework.observe(self.on.mysql_relation_broken, self.on_mysql_relation_broken) self.framework.observe(self.mysql.on.new_client, self.on_new_client) def on_start(self, event): logging.info('START') self.model.unit.status = MaintenanceStatus('Configuring pod') podSpec = self.makePodSpec() if self.model.unit.is_leader(): self.model.pod.set_spec(podSpec) self.state.isStarted = True self.state.podSpec = podSpec self.model.unit.status = ActiveStatus('ready') def on_stop(self, event): logging.info('STOP') self.state.isStarted = False def on_update_status(self, event): logging.info('UPDATE STATUS') def on_config_changed(self, event): logging.info('CONFIG CHANGED') podSpec = self.makePodSpec() if self.state.podSpec != podSpec: self.model.unit.status = MaintenanceStatus('Configuring pod') self.model.pod.set_spec(podSpec) self.model.unit.status = ActiveStatus('ready') def on_upgrade_charm(self, event): logging.info('UPGRADING') logging.info('UPGRADED') def on_leader_elected(self, event): logging.info('LEADER ELECTED') def on_mysql_relation_joined(self, event): logging.info('MYSQL RELATION JOINED') def on_mysql_relation_changed(self, event): logging.info('MYSQL RELATION CHANGED') def on_mysql_relation_departed(self, event): logging.info('MYSQL RELATION DEPARTED') def on_mysql_relation_broken(self, event): logging.info('MYSQL RELATION BROKEN') def on_new_client(self, event): logging.info('NEW CLIENT') if not self.state.isStarted: logging.info('NEW CLIENT DEFERRED') return event.defer() logging.info('NEW CLIENT SERVING') event.client.serve(database=self.model.config['database'], host=event.client.ingress_address, port=self.model.config['mysql_port'], user=self.model.config['user'], password=self.model.config['password'], root_password=self.model.config['root_password']) def makePodSpec(self): logging.info('MAKING POD SPEC') if self.model.config['ha-mode']: with open("templates/spec_template_ha.yaml") as spec_file: podSpecTemplate = spec_file.read() dockerImage = self.model.config['ha-image'] else: with open("templates/spec_template.yaml") as spec_file: podSpecTemplate = spec_file.read() dockerImage = self.model.config['image'] data = { "name": self.model.app.name, "docker_image": dockerImage, "mysql_port": int(self.model.config['mysql_port']), "root_password": self.model.config['root_password'], "application_name": self.meta.name, "user": self.model.config['user'], "password": self.model.config['password'], "database": self.model.config['database'], } podSpec = podSpecTemplate % data podSpec = yaml.load(podSpec) return podSpec
class CharmedOsmBase(CharmBase): """CharmedOsmBase Charm.""" state = StoredState() def __init__( self, *args, oci_image="image", debug_mode_config_key=None, debug_pubkey_config_key=None, vscode_workspace: Dict = {}, mysql_uri: bool = False, ) -> NoReturn: """ CharmedOsmBase Charm constructor :params: oci_image: Resource name for main OCI image :params: debug_mode_config_key: Key in charm config for enabling debugging mode :params: debug_pubkey_config_key: Key in charm config for setting debugging public ssh key :params: vscode_workspace: VSCode workspace """ super().__init__(*args) # Internal state initialization self.state.set_default(pod_spec=None) self.image = OCIImageResource(self, oci_image) self.debugging_supported = debug_mode_config_key and debug_pubkey_config_key self.debug_mode_config_key = debug_mode_config_key self.debug_pubkey_config_key = debug_pubkey_config_key self.vscode_workspace = vscode_workspace self.mysql_uri = mysql_uri # Registering regular events self.framework.observe(self.on.config_changed, self.configure_pod) self.framework.observe(self.on.leader_elected, self.configure_pod) def build_pod_spec(self, image_info: Dict, **kwargs): """ Method to be implemented by the charm to build the pod spec :params: image_info: Image info details :params: kwargs: mysql_config (opslib.osm.config.mysql.MysqlModel): Mysql config object. Will be included if the charm has been initialized with mysql_uri=True. """ raise NotImplementedError("build_pod_spec is not implemented") def _debug(self, pod_spec: Dict) -> NoReturn: """ Activate debugging mode in the charm :params: pod_spec: Pod Spec to be debugged. Note: The first container is the one that will be debugged. """ container = pod_spec["containers"][0] if "readinessProbe" in container["kubernetes"]: container["kubernetes"].pop("readinessProbe") if "livenessProbe" in container["kubernetes"]: container["kubernetes"].pop("livenessProbe") container["ports"].append({ "name": "ssh", "containerPort": 22, "protocol": "TCP", }) container["volumeConfig"].append({ "name": "scripts", "mountPath": "/osm-debug-scripts", "files": [{ "path": "debug.sh", "content": Template(DEBUG_SCRIPT).substitute( pubkey=self.config[self.debug_pubkey_config_key], vscode_workspace=json.dumps( self.vscode_workspace, sort_keys=True, indent=4, separators=(",", ": "), ), ), "mode": 0o777, }], }) container["command"] = ["/osm-debug-scripts/debug.sh"] def _debug_if_needed(self, pod_spec): """ Debug the pod_spec if needed :params: pod_spec: Pod Spec to be debugged. """ if self.debugging_supported and self.config[ self.debug_mode_config_key]: if self.debug_pubkey_config_key not in self.config: raise Exception("debug_pubkey config is not set") self._debug(pod_spec) def _get_build_pod_spec_kwargs(self): """Get kwargs for the build_pod_spec function""" kwargs = {} if self.mysql_uri: kwargs["mysql_config"] = MysqlModel(**self.config) return kwargs def configure_pod(self, _=None) -> NoReturn: """Assemble the pod spec and apply it, if possible.""" try: if self.unit.is_leader(): self.unit.status = MaintenanceStatus("Assembling pod spec") image_info = self.image.fetch() kwargs = self._get_build_pod_spec_kwargs() pod_spec = self.build_pod_spec(image_info, **kwargs) self._debug_if_needed(pod_spec) self._set_pod_spec(pod_spec) self.unit.status = ActiveStatus("ready") except OCIImageResourceError: self.unit.status = BlockedStatus( "Error fetching image information") except ValidationError as e: logger.error(f"Config data validation error: {e}") logger.debug(traceback.format_exc()) self.unit.status = BlockedStatus(str(e)) except RelationsMissing as e: logger.error(f"Relation missing error: {e.message}") logger.debug(traceback.format_exc()) self.unit.status = BlockedStatus(e.message) except ModelError as e: self.unit.status = BlockedStatus(str(e)) except Exception as e: error_message = f"Unknown exception: {e}" logger.error(error_message) logger.debug(traceback.format_exc()) self.unit.status = BlockedStatus(error_message) def _set_pod_spec(self, pod_spec: Dict[str, Any]) -> NoReturn: pod_spec_hash = hash_from_dict(pod_spec) if self.state.pod_spec != pod_spec_hash: self.model.pod.set_spec(pod_spec) self.state.pod_spec = pod_spec_hash logger.debug(f"applying pod spec with hash {pod_spec_hash}")
class Lxd(Object): state = StoredState() def __init__(self, charm: CharmBase, relation_name: str): super().__init__(charm, relation_name) self.state.set_default( endpoint=None, server_name=None, client_cert=None, client_key=None, server_cert=None, ) self._relation_name = relation_name self.framework.observe(charm.on[relation_name].relation_changed, self._on_relation_changed) self.framework.observe(charm.on[relation_name].relation_joined, self._on_relation_joined) @property def is_joined(self): return self.framework.model.get_relation( self._relation_name) is not None @property def is_ready(self) -> bool: return all([ self.state.endpoint, self.state.client_cert, self.state.client_key, self.state.server_cert ]) def _new_connection(self) -> httpclient.HTTPSConnection: """ Return a http.client.HTTPSConnection configured with the proper endpoint and certificates """ self._write_certs_to_filesystem() sslcontext = ssl.create_default_context( purpose=ssl.Purpose.CLIENT_AUTH, cafile=SERVER_CERT_PATH) sslcontext.load_cert_chain(certfile=CLIENT_CERT_PATH, keyfile=CLIENT_KEY_PATH) # Depending on how it was initialized, the LXD server cert can be configured # with 127.0.0.1 as its CN, failing the verification sslcontext.check_hostname = False endpoint = self.state.endpoint.replace('https://', '').replace('http://', '') return httpclient.HTTPSConnection(endpoint, context=sslcontext, timeout=5) def set_credentials(self, endpoint: str, client_cert: str, client_key: str, server_cert: str) -> None: self.state.endpoint = endpoint self.state.client_cert = client_cert self.state.client_key = client_key self.state.server_cert = server_cert # Check that the credentials are trusted to LXD conn = self._new_connection() conn.request('GET', '/1.0') raw_res = conn.getresponse().read().decode('utf-8') resp = json.loads(raw_res)['metadata'] conn.close() if resp['auth'] != 'trusted': raise RuntimeError('invalid credentials: not trusted') self.state.server_name = resp['environment']['server_name'] logger.info('credentials configured') def _on_relation_joined(self, event: RelationJoinedEvent): if not self.is_ready: event.defer() return data = self.framework.model.get_relation( self._relation_name).data[self.model.unit] data['nodes'] = json.dumps([{ 'endpoint': self.state.endpoint, 'name': self.state.server_name, 'trusted_certs_fp': [] }]) data['version'] = '1.0' def _clean_certs_from_filesystem(self): """Remove previously saved certificates from filesystem.""" if os.path.exists(CLIENT_CERT_PATH): os.remove(CLIENT_CERT_PATH) if os.path.exists(CLIENT_KEY_PATH): os.remove(CLIENT_KEY_PATH) if os.path.exists(SERVER_CERT_PATH): os.remove(SERVER_CERT_PATH) def _write_certs_to_filesystem(self): if not os.path.exists(CLIENT_CERT_PATH): with open( os.open(CLIENT_CERT_PATH, os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: f.write(self.state.client_cert) if not os.path.exists(CLIENT_KEY_PATH): with open( os.open(CLIENT_KEY_PATH, os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: f.write(self.state.client_key) if not os.path.exists(SERVER_CERT_PATH): with open( os.open(SERVER_CERT_PATH, os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: f.write(self.state.server_cert) def _unregister_cert(self, cert: str) -> None: if not self.is_ready: raise RuntimeError('credentials not configured') conn = self._new_connection() fp = self._cert_fingerprint(cert) logger.info('removing certificate {} from trust store'.format(fp)) conn.request('DELETE', '/1.0/certificates/{}'.format(fp)) response = conn.getresponse() data = response.read().decode('utf-8') conn.close() if response.getcode() != 202: logger.error(data) def _cert_fingerprint(self, cert): if isinstance(cert, str): cert = load_certificate(FILETYPE_PEM, cert) return cert.digest('sha256').decode('utf-8').replace(':', '').lower() def _register_cert(self, cert: str) -> None: if not self.is_ready: raise RuntimeError('credentials not configured') buff = io.StringIO(cert) content = '' line = buff.readline() while line and '-----BEGIN CERTIFICATE-----' not in line: line = buff.readline() line = buff.readline() while line and '-----END CERTIFICATE-----' not in line: content += line.rstrip('\r\n') line = buff.readline() x509_cert = load_certificate(FILETYPE_PEM, cert) name = x509_cert.get_subject().CN fp = self._cert_fingerprint(x509_cert) logger.info('adding certificate {} to trust store'.format(fp)) payload = json.dumps({ "type": "client", "certificate": content, "name": name }) headers = {'Content-Type': 'application/json'} conn = self._new_connection() conn.request('POST', '/1.0/certificates', headers=headers, body=payload) response = conn.getresponse() data = response.read().decode('utf-8') conn.close() self._clean_certs_from_filesystem() if 200 <= response.getcode() < 300: return if "Certificate already in trust store" in data: logger.warning( 'certificate already provisioned. Skipping provision') else: logger.error(data) def _on_relation_changed(self, event: RelationChangedEvent): if not self.is_ready: event.defer() return received = event.relation.data[event.unit] local_data = event.relation.data[self.model.unit] registered_certs = set( json.loads(received.get('trusted_certs_fp', '[]'))) client_certificates = set( json.loads(received.get('client_certificates', '[]'))) # Un-register removed certificates removed_certs = registered_certs - client_certificates for cert in removed_certs: self._unregister_cert(cert) # Register new certs nodes = json.loads(local_data.get('nodes', '[]')) current_node = next(node for node in nodes if node['endpoint'] == self.state.endpoint) trusted_fps = set(current_node['trusted_certs_fp']) new_certs = client_certificates - registered_certs for cert in new_certs: self._register_cert(cert) fp = self._cert_fingerprint(cert) trusted_fps.add(fp) current_node['trusted_certs_fp'] = list(trusted_fps) local_data['nodes'] = json.dumps([current_node]) self._clean_certs_from_filesystem()
class GrafanaK8s(CharmBase): """Charm to run Grafana on Kubernetes. This charm allows for high-availability (as long as a non-sqlite database relation is present). Developers of this charm should be aware of the Grafana provisioning docs: https://grafana.com/docs/grafana/latest/administration/provisioning/ """ datastore = StoredState() def __init__(self, *args): log.debug('Initializing charm.') super().__init__(*args) # -- get image information self.image = OCIImageResource(self, 'grafana-image') # -- standard hooks self.framework.observe(self.on.config_changed, self.on_config_changed) self.framework.observe(self.on.update_status, self.on_update_status) self.framework.observe(self.on.stop, self._on_stop) # -- grafana-source relation observations self.framework.observe(self.on['grafana-source'].relation_changed, self.on_grafana_source_changed) self.framework.observe(self.on['grafana-source'].relation_broken, self.on_grafana_source_broken) # -- grafana (peer) relation observations self.framework.observe(self.on['grafana'].relation_changed, self.on_peer_changed) # self.framework.observe(self.on['grafana'].relation_departed, # self.on_peer_departed) # -- database relation observations self.framework.observe(self.on['database'].relation_changed, self.on_database_changed) self.framework.observe(self.on['database'].relation_broken, self.on_database_broken) # -- initialize states -- self.datastore.set_default(sources=dict()) # available data sources self.datastore.set_default(source_names=set()) # unique source names self.datastore.set_default(sources_to_delete=set()) self.datastore.set_default(database=dict()) # db configuration @property def has_peer(self) -> bool: rel = self.model.get_relation('grafana') return len(rel.units) > 0 if rel is not None else False @property def has_db(self) -> bool: """Only consider a DB connection if we have config info.""" return len(self.datastore.database) > 0 def _on_stop(self, _): """Go into maintenance state if the unit is stopped.""" self.unit.status = MaintenanceStatus('Pod is terminating.') def on_config_changed(self, _): self.configure_pod() def on_update_status(self, _): """Various health checks of the charm.""" self._check_high_availability() def on_grafana_source_changed(self, event): """ Get relation data for Grafana source and set k8s pod spec. This event handler (if the unit is the leader) will get data for an incoming grafana-source relation and make the relation data is available in the app's datastore object (StoredState). """ # if this unit is the leader, set the required data # of the grafana-source in this charm's datastore if not self.unit.is_leader(): return # if there is no available unit, remove data-source info if it exists if event.unit is None: log.warning("event unit can't be None when setting data sources.") return # dictionary of all the required/optional datasource field values # using this as a more generic way of getting data source fields datasource_fields = \ {field: event.relation.data[event.unit].get(field) for field in REQUIRED_DATASOURCE_FIELDS | OPTIONAL_DATASOURCE_FIELDS} missing_fields = [ field for field in REQUIRED_DATASOURCE_FIELDS if datasource_fields.get(field) is None ] # check the relation data for missing required fields if len(missing_fields) > 0: log.error("Missing required data fields for grafana-source " "relation: {}".format(missing_fields)) self._remove_source_from_datastore(event.relation.id) return # specifically handle optional fields if necessary # check if source-name was not passed or if we have already saved the provided name if datasource_fields['source-name'] is None\ or datasource_fields['source-name'] in self.datastore.source_names: default_source_name = '{}_{}'.format(event.app.name, event.relation.id) log.warning( "No name 'grafana-source' or provided name is already in use. " "Using safe default: {}.".format(default_source_name)) datasource_fields['source-name'] = default_source_name self.datastore.source_names.add(datasource_fields['source-name']) # set the first grafana-source as the default (needed for pod config) # if `self.datastore.sources` is currently empty, this is the first datasource_fields['isDefault'] = 'false' if not dict(self.datastore.sources): datasource_fields['isDefault'] = 'true' # add unit name so the source can be removed might be a # duplicate of 'source-name', but this will guarantee lookup datasource_fields['unit_name'] = event.unit.name # add the new datasource relation data to the current state new_source_data = { field: value for field, value in datasource_fields.items() if value is not None } self.datastore.sources.update({event.relation.id: new_source_data}) self.configure_pod() def on_grafana_source_broken(self, event): """When a grafana-source is removed, delete from the datastore.""" if self.unit.is_leader(): self._remove_source_from_datastore(event.relation.id) self.configure_pod() def on_peer_changed(self, _): # TODO: https://grafana.com/docs/grafana/latest/tutorials/ha_setup/ # According to these docs ^, as long as we have a DB, HA should # work out of the box if we are OK with "Sticky Sessions" # but having "Stateless Sessions" could require more config # if the config changed, set a new pod spec self.configure_pod() def on_peer_departed(self, _): """Sets pod spec with new info.""" # TODO: setting pod spec shouldn't do anything now, # but if we ever need to change config based peer units, # we will want to make sure configure_pod() is called self.configure_pod() def on_database_changed(self, event): """Sets configuration information for database connection.""" if not self.unit.is_leader(): return if event.unit is None: log.warning("event unit can't be None when setting db config.") return # save the necessary configuration of this database connection database_fields = \ {field: event.relation.data[event.unit].get(field) for field in REQUIRED_DATABASE_FIELDS | OPTIONAL_DATABASE_FIELDS} # if any required fields are missing, warn the user and return missing_fields = [ field for field in REQUIRED_DATABASE_FIELDS if database_fields.get(field) is None ] if len(missing_fields) > 0: log.error("Missing required data fields for related database " "relation: {}".format(missing_fields)) return # check if the passed database type is not in VALID_DATABASE_TYPES if database_fields['type'] not in VALID_DATABASE_TYPES: log.error('Grafana can only accept databases of the following ' 'types: {}'.format(VALID_DATABASE_TYPES)) return # add the new database relation data to the datastore self.datastore.database.update({ field: value for field, value in database_fields.items() if value is not None }) self.configure_pod() def on_database_broken(self, _): """Removes database connection info from datastore. We are guaranteed to only have one DB connection, so clearing datastore.database is all we need for the change to be propagated to the pod spec.""" if not self.unit.is_leader(): return # remove the existing database info from datastore self.datastore.database = dict() # set pod spec because datastore config has changed self.configure_pod() def _remove_source_from_datastore(self, rel_id): """Remove the grafana-source from the datastore. Once removed from the datastore, this datasource will not part of the next pod spec.""" log.info('Removing all data for relation: {}'.format(rel_id)) removed_source = self.datastore.sources.pop(rel_id, None) if removed_source is None: log.warning( 'Could not remove source for relation: {}'.format(rel_id)) else: # free name from charm's set of source names # and save to set which will be used in set_pod_spec self.datastore.source_names.remove(removed_source['source-name']) self.datastore.sources_to_delete.add(removed_source['source-name']) def _check_high_availability(self): """Checks whether the configuration allows for HA.""" if self.has_peer: if self.has_db: log.info('high availability possible.') status = MaintenanceStatus('Grafana ready for HA.') else: log.warning('high availability not possible ' 'with current configuration.') status = BlockedStatus('Need database relation for HA.') else: log.info('running Grafana on single node.') status = MaintenanceStatus('Grafana ready on single node.') # make sure we don't have a maintenance status overwrite # a currently active status if isinstance(status, MaintenanceStatus) \ and isinstance(self.unit.status, ActiveStatus): return status self.unit.status = status return status def _make_delete_datasources_config_text(self) -> str: """Generate text of data sources to delete.""" if not self.datastore.sources_to_delete: return "\n" delete_datasources_text = textwrap.dedent(""" deleteDatasources:""") for name in self.datastore.sources_to_delete: delete_datasources_text += textwrap.dedent(""" - name: {} orgId: 1""".format(name)) # clear datastore.sources_to_delete and return text result self.datastore.sources_to_delete.clear() return delete_datasources_text + '\n\n' def _make_data_source_config_text(self) -> str: """Build config based on Data Sources section of provisioning docs.""" # get starting text for the config file and sources to delete delete_text = self._make_delete_datasources_config_text() config_text = textwrap.dedent(""" apiVersion: 1 """) config_text += delete_text if self.datastore.sources: config_text += "datasources:" for rel_id, source_info in self.datastore.sources.items(): # TODO: handle more optional fields and verify that current # defaults are what we want (e.g. "access") config_text += textwrap.dedent(""" - name: {0} type: {1} access: proxy url: http://{2}:{3} isDefault: {4} editable: true orgId: 1""").format( source_info['source-name'], source_info['source-type'], source_info['private-address'], source_info['port'], source_info['isDefault'], ) # check if there these are empty return config_text + '\n' def _update_pod_data_source_config_file(self, pod_spec): """Adds datasources to pod configuration.""" file_text = self._make_data_source_config_text() data_source_file_meta = { 'name': 'grafana-datasources', 'mountPath': '/etc/grafana/provisioning/datasources', 'files': [{ 'path': 'datasources.yaml', 'content': file_text, }] } container = get_container(pod_spec, self.app.name) container['volumeConfig'].append(data_source_file_meta) # get hash string of the new file text and put into container config # if this changes, it will trigger a pod restart file_text_hash = hashlib.md5(file_text.encode()).hexdigest() if 'DATASOURCES_YAML' in container['envConfig'] \ and container['envConfig']['DATASOURCES_YAML'] != file_text_hash: log.info('datasources.yaml hash has changed. ' 'Triggering pod restart.') container['envConfig']['DATASOURCES_YAML'] = file_text_hash def _make_config_ini_text(self): """Create the text of the config.ini file. More information about this can be found in the Grafana docs: https://grafana.com/docs/grafana/latest/administration/configuration/ """ config_text = textwrap.dedent(""" [paths] provisioning = /etc/grafana/provisioning [log] mode = console level = {0} """.format(self.model.config['grafana_log_level'], )) # if there is a database available, add that information if self.datastore.database: db_config = self.datastore.database config_text += textwrap.dedent(""" [database] type = {0} host = {1} name = {2} user = {3} password = {4} url = {0}://{3}:{4}@{1}/{2}""".format( db_config['type'], db_config['host'], db_config['name'], db_config['user'], db_config['password'], )) return config_text def _update_pod_config_ini_file(self, pod_spec): file_text = self._make_config_ini_text() config_ini_file_meta = { 'name': 'grafana-config-ini', 'mountPath': '/etc/grafana', 'files': [{ 'path': 'grafana.ini', 'content': file_text }] } container = get_container(pod_spec, self.app.name) container['volumeConfig'].append(config_ini_file_meta) # get hash string of the new file text and put into container config # if this changes, it will trigger a pod restart file_text_hash = hashlib.md5(file_text.encode()).hexdigest() if 'GRAFANA_INI' in container['envConfig'] \ and container['envConfig']['GRAFANA_INI'] != file_text_hash: log.info('grafana.ini hash has changed. Triggering pod restart.') container['envConfig']['GRAFANA_INI'] = file_text_hash def _build_pod_spec(self): """Builds the pod spec based on available info in datastore`.""" config = self.model.config spec = { 'version': 3, 'containers': [{ 'name': self.app.name, 'image': "ubuntu/grafana:latest", 'ports': [{ 'containerPort': config['port'], 'protocol': 'TCP' }], 'volumeConfig': [], 'envConfig': {}, # used to store hashes of config file text 'kubernetes': { 'readinessProbe': { 'httpGet': { 'path': '/api/health', 'port': config['port'] }, 'initialDelaySeconds': 10, 'timeoutSeconds': 30 }, }, }] } return spec def configure_pod(self): """Set Juju / Kubernetes pod spec built from `_build_pod_spec()`.""" # check for valid high availability (or single node) configuration self._check_high_availability() # in the case where we have peers but no DB connection, # don't set the pod spec until it is resolved if self.unit.status == BlockedStatus('Need database relation for HA.'): log.error('Application is in a blocked state. ' 'Please resolve before pod spec can be set.') return if not self.unit.is_leader(): self.unit.status = ActiveStatus() return # general pod spec component updates self.unit.status = MaintenanceStatus('Building pod spec.') pod_spec = self._build_pod_spec() if not pod_spec: return self._update_pod_data_source_config_file(pod_spec) self._update_pod_config_ini_file(pod_spec) # set the pod spec with Juju self.model.pod.set_spec(pod_spec) self.unit.status = ActiveStatus()
class MetallbSpeakerCharm(CharmBase): """MetalLB Speaker Charm.""" _stored = StoredState() def __init__(self, *args): """Charm initialization for events observation.""" super().__init__(*args) self.framework.observe(self.on.start, self.on_start) self.framework.observe(self.on.remove, self.on_remove) # -- initialize states -- self._stored.set_default(started=False) # -- base values -- self._stored.set_default(namespace=os.environ["JUJU_MODEL_NAME"]) self._stored.set_default(container_image='metallb/speaker:v0.9.3') def on_start(self, event): """Occurs upon start or installation of the charm.""" if not self.framework.model.unit.is_leader(): return logging.info('Setting the pod spec') self.framework.model.unit.status = MaintenanceStatus("Configuring pod") self.set_pod_spec() response = utils.create_pod_security_policy_with_api( namespace=self._stored.namespace, ) if not response: self.framework.model.unit.status = \ BlockedStatus("An error occured during init. Please check the logs.") return response = utils.create_namespaced_role_with_api( name='config-watcher', namespace=self._stored.namespace, labels={'app': 'metallb'}, resources=['configmaps'], verbs=['get', 'list', 'watch']) if not response: self.framework.model.unit.status = \ BlockedStatus("An error occured during init. Please check the logs.") return response = utils.create_namespaced_role_with_api( name='pod-lister', namespace=self._stored.namespace, labels={'app': 'metallb'}, resources=['pods'], verbs=['list']) if not response: self.framework.model.unit.status = \ BlockedStatus("An error occured during init. Please check the logs.") return response = utils.bind_role_with_api(name='config-watcher', namespace=self._stored.namespace, labels={'app': 'metallb'}, subject_name='speaker') if not response: self.framework.model.unit.status = \ BlockedStatus("An error occured during init. Please check the logs.") return response = utils.bind_role_with_api(name='pod-lister', namespace=self._stored.namespace, labels={'app': 'metallb'}, subject_name='speaker') if not response: self.framework.model.unit.status = \ BlockedStatus("An error occured during init. Please check the logs.") return self.framework.model.unit.status = ActiveStatus("Ready") self._stored.started = True def on_remove(self, event): """Remove artifacts created by the K8s API.""" if not self.framework.model.unit.is_leader(): return self.framework.model.unit.status = MaintenanceStatus("Removing pod") logger.info("Removing artifacts that were created with the k8s API") utils.delete_pod_security_policy_with_api(name='speaker') utils.delete_namespaced_role_binding_with_api( name='config-watcher', namespace=self._stored.namespace) utils.delete_namespaced_role_with_api(name='config-watcher', namespace=self._stored.namespace) utils.delete_namespaced_role_binding_with_api( name='pod-lister', namespace=self._stored.namespace) utils.delete_namespaced_role_with_api(name='pod-lister', namespace=self._stored.namespace) self.framework.model.unit.status = ActiveStatus( "Removing extra config done.") self._stored.started = False def set_pod_spec(self): """Set pod spec.""" secret = utils._random_secret(128) self.framework.model.pod.set_spec( { 'version': 3, 'serviceAccount': { 'roles': [{ 'global': True, 'rules': [ { 'apiGroups': [''], 'resources': ['services', 'endpoints', 'nodes'], 'verbs': ['get', 'list', 'watch'], }, { 'apiGroups': [''], 'resources': ['events'], 'verbs': ['create', 'patch'], }, { 'apiGroups': ['policy'], 'resourceNames': ['speaker'], 'resources': ['podsecuritypolicies'], 'verbs': ['use'], }, ], }], }, 'containers': [{ 'name': 'speaker', 'image': self._stored.container_image, 'imagePullPolicy': 'Always', 'ports': [{ 'containerPort': 7472, 'protocol': 'TCP', 'name': 'monitoring' }], 'envConfig': { 'METALLB_NODE_NAME': { 'field': { 'path': 'spec.nodeName', 'api-version': 'v1' } }, 'METALLB_HOST': { 'field': { 'path': 'status.hostIP', 'api-version': 'v1' } }, 'METALLB_ML_BIND_ADDR': { 'field': { 'path': 'status.podIP', 'api-version': 'v1' } }, 'METALLB_ML_LABELS': "app=metallb,component=speaker", 'METALLB_ML_NAMESPACE': { 'field': { 'path': 'metadata.namespace', 'api-version': 'v1' } }, 'METALLB_ML_SECRET_KEY': { 'secret': { 'name': 'memberlist', 'key': 'secretkey' } } }, # TODO: add constraint fields once it exists in pod_spec # bug : https://bugs.launchpad.net/juju/+bug/1893123 # 'resources': { # 'limits': { # 'cpu': '100m', # 'memory': '100Mi', # } # }, 'kubernetes': { 'securityContext': { 'allowPrivilegeEscalation': False, 'readOnlyRootFilesystem': True, 'capabilities': { 'add': ['NET_ADMIN', 'NET_RAW', 'SYS_ADMIN'], 'drop': ['ALL'] }, }, # fields do not exist in pod_spec # 'TerminationGracePeriodSeconds': 2, }, }], 'kubernetesResources': { 'secrets': [{ 'name': 'memberlist', 'type': 'Opaque', 'data': { 'secretkey': b64encode(secret.encode('utf-8')).decode('utf-8') } }] }, 'service': { 'annotations': { 'prometheus.io/port': '7472', 'prometheus.io/scrape': 'true' } }, }, )
class InfluxDB(Object): """InfluxDB interface.""" _stored = StoredState() on = InfluxDBEvents() def __init__(self, charm, relation_name): """Observe relation events.""" super().__init__(charm, relation_name) self._charm = charm self._relation_name = relation_name self._INFLUX_USER = "******" self._INFLUX_PRIVILEGE = "all" self._INFLUX_DATABASE = self._charm.cluster_name self._stored.set_default( influxdb_info=str(), influxdb_admin_info=str(), ) self.framework.observe( self._charm.on[self._relation_name].relation_changed, self._on_relation_changed, ) self.framework.observe( self._charm.on[self._relation_name].relation_broken, self._on_relation_broken, ) def _on_relation_changed(self, event): """Store influxdb_ingress in the charm.""" if self.framework.model.unit.is_leader(): if not self._stored.influxdb_admin_info: ingress = event.relation.data[event.unit]["ingress-address"] port = event.relation.data[event.unit].get("port") user = event.relation.data[event.unit].get("user") password = event.relation.data[event.unit].get("password") if all([ingress, port, user, password]): admin_info = { "ingress": ingress, "port": port, "user": user, "password": password } self._stored.influxdb_admin_info = json.dumps(admin_info) # Influxdb client client = influxdb.InfluxDBClient(ingress, port, user, password) # Influxdb slurm user password influx_slurm_password = generate_password() # Only create the user and db if they don't already exist users = [db["user"] for db in client.get_list_users()] logger.debug(f"## users in influxdb: {users}") if self._INFLUX_USER not in users: logger.debug( f"## Creating influxdb user: {self._INFLUX_USER}") client.create_user(self._INFLUX_USER, influx_slurm_password) databases = [ db["name"] for db in client.get_list_database() ] if self._INFLUX_DATABASE not in databases: logger.debug( f"## Creating influxdb db: {self._INFLUX_DATABASE}" ) client.create_database(self._INFLUX_DATABASE) client.grant_privilege(self._INFLUX_PRIVILEGE, self._INFLUX_DATABASE, self._INFLUX_USER) # select default retention policy policies = client.get_list_retention_policies( self._INFLUX_DATABASE) policy = "slurm" for p in policies: if p["default"]: policy = p["name"] # Dump influxdb_info to json and set it to state influxdb_info = { "ingress": ingress, "port": port, "user": self._INFLUX_USER, "password": influx_slurm_password, "database": self._INFLUX_DATABASE, "retention_policy": policy } self._stored.influxdb_info = json.dumps(influxdb_info) self.on.influxdb_available.emit() def _on_relation_broken(self, event): """Remove the database and user from influxdb.""" if self.framework.model.unit.is_leader(): if self._stored.influxdb_admin_info: influxdb_admin_info = json.loads( self._stored.influxdb_admin_info) client = influxdb.InfluxDBClient( influxdb_admin_info["ingress"], influxdb_admin_info["port"], influxdb_admin_info["user"], influxdb_admin_info["password"]) databases = [db["name"] for db in client.get_list_database()] if self._INFLUX_DATABASE in databases: client.drop_database(self._INFLUX_DATABASE) users = [db["user"] for db in client.get_list_users()] if self._INFLUX_USER in users: client.drop_user(self._INFLUX_USER) self._stored.influxdb_info = "" self._stored.influxdb_admin_info = "" self.on.influxdb_unavailable.emit() def get_influxdb_info(self) -> dict: """Return the influxdb info.""" influxdb_info = self._stored.influxdb_info if influxdb_info: return json.loads(influxdb_info) else: return {}
class Charm(CharmBase): state = StoredState() def __init__(self, *args): super().__init__(*args) # Abstract out framework and friends so that this object is not # too tightly coupled with the underlying framework's implementation. # From this point forward, our Charm object will only interact with the # adapter and not directly with the framework. self.fw_adapter = framework.FrameworkAdapter(self.framework) self.prometheus_client = interface_http.Client(self, 'prometheus-api') self.mysql = interface_mysql.MySQLInterface(self, 'mysql') self.state.set_default( prometheus_server_details=None, mysql_server_details=None, ) # Bind event handlers to events event_handler_bindings = { self.mysql.on.new_relation: self.on_mysql_new_relation, self.on.config_changed: self.on_config_changed, self.on.start: self.on_start, self.on.update_status: self.on_update_status, self.on.upgrade_charm: self.on_start, self.prometheus_client.on.server_available: self.on_prom_available, } for event, delegator in event_handler_bindings.items(): self.fw_adapter.observe(event, delegator) # DELEGATORS # These delegators exist to decouple the actual handlers from the # underlying framework which has some very specific requirements that # do not always apply to every event. For example, because we have to # instantiate the interface_http.Client during charm initialization, # we are forced to write unit tests that mock out that object even # for handlers that do not need it. This hard coupling results in verbose # tests that contain unused mocks. These tests tend to be hard to follow # so to counter that, the logic is moved away from this class. def on_config_changed(self, event): on_config_changed_handler(event, self.fw_adapter) def on_mysql_new_relation(self, event): log.debug("Received event {}".format(event)) server_details = event.server_details log.debug("Received server_details {}:{}".format( type(server_details), server_details)) log.debug("Snapshotting to StoredState") self.state.mysql_server_details = server_details.snapshot() log.debug("Calling update_grafana_configuration") on_server_new_relation_handler(event, self.state, self.fw_adapter) def on_prom_available(self, event): log.debug("Received event {}".format(event)) server_details = event.server_details log.debug("Received server_details {}:{}".format( type(server_details), server_details)) log.debug("Snapshotting to StoredState") self.state.prometheus_server_details = server_details.snapshot() log.debug("Calling update_grafana_configuration") on_server_new_relation_handler(event, self.state, self.fw_adapter) def on_start(self, event): on_start_handler(event, self.fw_adapter) def on_update_status(self, event): on_update_status_handler(event, self.fw_adapter)