class MetalLBSpeakerCharm(CharmBase): def __init__(self, *args): super().__init__(*args) self.speaker_image = OCIImageResource(self, 'speaker-image') self.framework.observe(self.on.install, self.set_pod_spec) self.framework.observe(self.on.upgrade_charm, self.set_pod_spec) @staticmethod def _get_pod_spec(config={}): with open('metallb.yaml') as f_in: spec = yaml.load(f_in) spec.update(config) return spec def set_pod_spec(self, event): if not self.model.unit.is_leader(): print('Not a leader, skipping set_pod_spec') self.model.unit.status = ActiveStatus() return try: speaker_details = self.speaker_image.fetch() except ResourceError as e: self.model.unit.status = e.status return self.model.unit.status = MaintenanceStatus('Setting pod spec') self.model.pod.set_spec(self._get_pod_spec()) self.model.unit.status = ActiveStatus()
def test_when_complete_image_info_should_match_given_resource( self, read_text, path_exists, charm): # Given resource = OCIImageResource(charm, "test-image") # Monkeypatch fetch as we can't mock the parent Object def patched_fetch(name: str) -> Path: return Path("/a/b/c/d/e/f") resource.model.resources.fetch = patched_fetch path_exists.return_value = True image = "image:6.0" user = "******" pwd = "pwd" read_text.return_value = """ "registrypath": {} "username": {} "password": {} """.format(image, user, pwd) # When image_info = resource.fetch() # Then self.assertDictEqual(image_info, { "imagePath": image, "password": pwd, "username": user })
def test_when_fetch_fails_with_model_error(self, charm): # Given resource = OCIImageResource(charm, "test-image") # Monkeypatch fetch as we can't mock the parent Object def patched_fetch(name: str) -> Path: raise ModelError() resource.model.resources.fetch = patched_fetch # When with self.assertRaises(Exception) as context: resource.fetch() # Then self.assertTrue("Missing resource: test-image", str(context.exception))
def test_when_resource_path_does_not_exist(self, path_exists, charm): # Given resource = OCIImageResource(charm, "test-image") # Monkeypatch fetch as we can't mock the parent Object def patched_fetch(name: str) -> Path: return Path("/a/b/c/d/e/f") resource.model.resources.fetch = patched_fetch path_exists.return_value = False # When with self.assertRaises(Exception) as context: resource.fetch() # Then self.assertTrue("Missing resource: test-image", str(context.exception))
def test_when_resource_file_is_empty(self, read_text, path_exists, charm): # Given resource = OCIImageResource(charm, "test-image") # Monkeypatch fetch as we can't mock the parent Object def patched_fetch(name: str) -> Path: return Path('/a/b/c/d/e/f') resource.model.resources.fetch = patched_fetch path_exists.return_value = True read_text.return_value = "" # When with self.assertRaises(Exception) as context: resource.fetch() # Then self.assertTrue('Missing resource: test-image', str(context.exception))
class CharmedOsmBase(CharmBase): """CharmedOsmBase Charm.""" state = StoredState() def __init__(self, *args, oci_image="image") -> NoReturn: """CharmedOsmBase Charm constructor.""" super().__init__(*args) # Internal state initialization self.state.set_default(pod_spec=None) self.image = OCIImageResource(self, oci_image) # Registering regular events self.framework.observe(self.on.config_changed, self.configure_pod) self.framework.observe(self.on.leader_elected, self.configure_pod) def build_pod_spec(self, image_info): raise NotImplementedError("build_pod_spec is not implemented") def configure_pod(self, _=None) -> NoReturn: """Assemble the pod spec and apply it, if possible.""" try: if self.unit.is_leader(): self.unit.status = MaintenanceStatus("Assembling pod spec") image_info = self.image.fetch() pod_spec = self.build_pod_spec(image_info) self._set_pod_spec(pod_spec) self.unit.status = ActiveStatus("ready") except OCIImageResourceError: self.unit.status = BlockedStatus("Error fetching image information") except ValidationError as e: logger.exception(f"Config data validation error: {e}") self.unit.status = BlockedStatus(str(e)) except RelationsMissing as e: logger.error(f"Relation missing error: {e.message}") self.unit.status = BlockedStatus(e.message) except ModelError as e: self.unit.status = BlockedStatus(str(e)) except Exception as e: error_message = f"Unknown exception: {e}" logger.error(error_message) self.unit.status = BlockedStatus(error_message) def _set_pod_spec(self, pod_spec: Dict[str, Any]) -> NoReturn: pod_spec_hash = _hash_from_dict(pod_spec) if self.state.pod_spec != pod_spec_hash: self.model.pod.set_spec(pod_spec) self.state.pod_spec = pod_spec_hash
def test_when_resource_is_not_a_well_formatted_yaml( self, read_text, path_exists, charm): # Given resource = OCIImageResource(charm, "test-image") # Monkeypatch fetch as we can't mock the parent Object def patched_fetch(name: str) -> Path: return Path("/a/b/c/d/e/f") resource.model.resources.fetch = patched_fetch path_exists.return_value = True read_text.return_value = """ item <<: *item_attributes """ # When with self.assertRaises(Exception) as context: resource.fetch() # Then self.assertTrue("Invalid resource: test-image", str(context.exception))
def test_when_resource_misses_registry_path(self, read_text, path_exists, charm): # Given resource = OCIImageResource(charm, "test-image") # Monkeypatch fetch as we can't mock the parent Object def patched_fetch(name: str) -> Path: return Path("/a/b/c/d/e/f") resource.model.resources.fetch = patched_fetch path_exists.return_value = True read_text.return_value = """ "username": {} "password": {} """ # When with self.assertRaises(Exception) as context: resource.fetch() # Then self.assertTrue("Invalid resource: test-image", str(context.exception))
class MattermostK8sCharm(CharmBase): state = StoredState() def __init__(self, framework, key): super().__init__(framework, key) # get our mattermost_image from juju # ie: juju deploy . --resource mattermost_image=mattermost:latest ) self.mattermost_image = OCIImageResource(self, 'mattermost_image') self.framework.observe(self.on.start, self.configure_pod) self.framework.observe(self.on.config_changed, self.configure_pod) self.framework.observe(self.on.upgrade_charm, self.configure_pod) def configure_pod(self, event): if not self.framework.model.unit.is_leader(): self.model.unit.status = WaitingStatus('Not a leader') return mattermost_image_details = self.mattermost_image.fetch() self.model.unit.status = MaintenanceStatus('Configuring pod') config = self.model.config self.model.pod.set_spec({ 'containers': [{ 'name': self.framework.model.app.name, 'imageDetails': mattermost_image_details, 'ports': [{ 'containerPort': int(self.framework.model.config['mattermost_port']), 'protocol': 'TCP', }], 'config': { 'MATTERMOST_HTTPD_LISTEN_PORT': int(config['mattermost_port']), 'DB_HOST': config['pg_db_host'], 'DB_PORT_NUMBER': int(config['pg_db_port']), 'MM_USERNAME': config['pg_user'], 'MM_PASSWORD': config['pg_password'], 'MM_ENABLEOPENSERVER': config['open_server'], 'MM_ENABLEUPLOADS': config['enable_plugin_uploads'], }, }] }) self.state.is_started = True self.model.unit.status = ActiveStatus()
class SRIOVCNICharm(CharmBase): def __init__(self, *args): super().__init__(*args) self.image = OCIImageResource(self, 'sriov-cni-image') self.framework.observe(self.on.install, self.set_pod_spec) self.framework.observe(self.on.upgrade_charm, self.set_pod_spec) self.framework.observe(self.on.config_changed, self.set_pod_spec) def set_pod_spec(self, event): if not self.model.unit.is_leader(): log.info('Not a leader, skipping set_pod_spec') self.model.unit.status = ActiveStatus() return try: image_details = self.image.fetch() except OCIImageResourceError as e: self.model.unit.status = e.status return cni_bin_dir = self.model.config.get('cni-bin-dir', '/opt/cni/bin') self.model.unit.status = MaintenanceStatus('Setting pod spec') self.model.pod.set_spec({ 'version': 3, 'containers': [{ 'name': 'sriov-cni', 'imageDetails': image_details, 'volumeConfig': [{ 'name': 'cni-bin', 'mountPath': '/dest', 'hostPath': { 'path': cni_bin_dir } }] }], 'kubernetesResources': { 'pod': { 'hostNetwork': True, } } }) self.model.unit.status = ActiveStatus()
def test_when_partial_image_info_should_match_given_resource( self, read_text, path_exists, charm): # Given resource = OCIImageResource(charm, "test-image") # Monkeypatch fetch as we can't mock the parent Object def patched_fetch(name: str) -> Path: return Path('/a/b/c/d/e/f') resource.model.resources.fetch = patched_fetch path_exists.return_value = True image = "image:6.0" read_text.return_value = """ "registrypath": {} """.format(image) # When image_info = resource.fetch() # Then self.assertDictEqual(image_info, {'imagePath': image})
class MongoDBCharm(CharmBase): state = StoredState() on = MongoDBClusterEvents() def __init__(self, *args): super().__init__(*args) self.state.set_default(started=False) self.state.set_default(pod_spec=None) self.port = MONGODB_PORT self.image = OCIImageResource(self, "mongodb-image") # Register all of the events we want to observe self.framework.observe(self.on.install, self.configure_pod) self.framework.observe(self.on.config_changed, self.configure_pod) self.framework.observe(self.on.upgrade_charm, self.configure_pod) self.framework.observe(self.on.start, self.on_start) self.framework.observe(self.on.update_status, self.on_update_status) # Peer relation self.cluster = MongoDBCluster(self, "cluster", self.port) self.framework.observe(self.on.cluster_relation_changed, self.reconfigure) self.framework.observe(self.on.cluster_relation_departed, self.reconfigure) # Cluster Events self.framework.observe(self.on.mongodb_started, self.on_mongodb_started) logger.debug("MongoDBCharm initialized!") # ############################################# # ########## CHARM HOOKS HANDLERS ############# # ############################################# # hooks: install, config-changed, upgrade-charm def configure_pod(self, event): # Continue only if the unit is the leader if not self.unit.is_leader(): self.on_update_status(event) return logger.debug("Running configuring_pod") # Check problems in the settings problems = self._check_settings() if problems: self.unit.status = BlockedStatus(problems) return # Fetch image information try: self.unit.status = WaitingStatus("Fetching image information") image_info = self.image.fetch() except OCIImageResourceError: self.unit.status = BlockedStatus( "Error fetching image information") return # Build Pod spec self.unit.status = BlockedStatus("Assembling pod spec") pod_spec = make_pod_spec( image_info, self.port, replica_set_name=self.replica_set_name if not self.standalone else None, ) # Update pod spec if the generated one is different # from the one previously applied if self.state.pod_spec != pod_spec: self.model.pod.set_spec(pod_spec) self.state.pod_spec = pod_spec self.on_update_status(event) logger.debug("Running configuring_pod finished") # hooks: start def on_start(self, event): if not self.unit.is_leader(): return logger.debug("Running on_start") if MongoConnector.ready(self.standalone_uri): self.on.mongodb_started.emit() else: # This event is not being retriggered before update_status event.defer() return # Can't call update_status because an infinite loop might happen # due to the fact I'm calling on_start from update_status # self.on_update_status(event) logger.debug("Running on_start finished") # hooks: update-status def on_update_status(self, event): status_message = "" if self.standalone: status_message += "standalone-mode: " if MongoConnector.ready(self.standalone_uri): status_message += "ready" self.unit.status = ActiveStatus(status_message) else: status_message += "service not ready yet" self.unit.status = WaitingStatus(status_message) else: status_message += f"replica-set-mode({self.replica_set_name}): " if MongoConnector.ready(self.standalone_uri): status_message += "ready" if self.unit.is_leader(): if self.cluster.ready: hosts_count = len(self.cluster.replica_set_hosts) status_message += f" ({hosts_count} members)" else: status_message += " (replica set not initialized yet)" # Since on_start is not being properly triggered, # I'm calling it manually here. self.on.start.emit() self.unit.status = WaitingStatus(status_message) return self.unit.status = ActiveStatus(status_message) else: status_message += "service not ready yet" self.unit.status = WaitingStatus(status_message) # ############################################# # ####### PEER RELATION HOOK HANDLERS ######### # ############################################# # hooks: cluster-relation-changed, cluster-relation-departed def reconfigure(self, event): logger.debug("Running reconfigure") if (self.unit.is_leader() and self.cluster.replica_set_initialized and self.cluster.need_replica_set_reconfiguration()): uri = self.replica_set_uri config = MongoConnector.replset_get_config(uri) config = MongoConnector.replset_generate_config( self.cluster.hosts, self.replica_set_name, increase_version=True, config=config, ) MongoConnector.replset_reconfigure(uri, config) self.on.replica_set_configured.emit(self.cluster.hosts) self.on_update_status(event) logger.debug("Running reconfigure finished") # ############################################# # ######### CLUSTER EVENT HANDLERS ############ # ############################################# def on_mongodb_started(self, event): if not self.unit.is_leader() or self.standalone: return logger.debug("Running on_mongodb_started") if not self.cluster.replica_set_initialized: self.unit.status = WaitingStatus("Initializing the replica set") config = MongoConnector.replset_generate_config( self.cluster.hosts, self.replica_set_name) MongoConnector.replset_initialize(self.standalone_uri, config) self.on.replica_set_configured.emit(self.cluster.hosts) self.on.cluster_ready.emit() logger.debug("Running on_mongodb_started finished") # ############################################# # ############## PROPERTIES ################### # ############################################# @property def replica_set_name(self): return self.model.config["replica_set_name"] @property def standalone(self): return self.model.config["standalone"] # ############################################# # ############# PRIVATE METHODS ############### # ############################################# def _check_settings(self): problems = [] config = self.model.config for setting in REQUIRED_SETTINGS: if config.get(setting) is None: problem = f"missing config {setting}" problems.append(problem) if not self.standalone: for setting in REQUIRED_SETTINGS_NOT_STANDALONE: if not config.get(setting): problem = f"missing config {setting}" problems.append(problem) return ";".join(problems) @property def replica_set_uri(self): uri = "mongodb://" for i, host in enumerate(self.cluster.hosts): if i: uri += "," uri += f"{host}:{self.port}" uri += f"/?replicaSet={self.replica_set_name}" return uri @property def standalone_uri(self): return f"mongodb://{self.model.app.name}:{self.port}/"
class DashboardMetricsScraperCharm(CharmBase): def __init__(self, *args): super().__init__(*args) if not self.unit.is_leader(): # We can't do anything useful when not the leader, so do nothing. self.model.unit.status = WaitingStatus('Waiting for leadership') return ProvideK8sService(self, 'metrics-scraper', service_name=self.app.name, service_port=self.model.config["port"]) self.log = logging.getLogger(__name__) self.scraper_image = OCIImageResource(self, 'metrics-scraper-image') for event in [ self.on.install, self.on.leader_elected, self.on.upgrade_charm, self.on.config_changed ]: self.framework.observe(event, self.main) def main(self, event): try: scraper_image_details = self.scraper_image.fetch() except OCIImageResourceError as e: self.model.unit.status = e.status return self.model.unit.status = MaintenanceStatus('Setting pod spec') self.model.pod.set_spec({ 'version': 3, 'service': { 'updateStrategy': { 'type': 'RollingUpdate', 'rollingUpdate': { 'maxUnavailable': 1 }, }, 'annotations': { 'seccomp.security.alpha.kubernetes.io/pod': 'runtime/default', }, }, 'containers': [ { 'name': self.model.app.name, 'imageDetails': scraper_image_details, 'ports': [ { 'name': 'scraper', 'containerPort': self.model.config["port"], 'protocol': 'TCP', }, ], 'volumeConfig': [ { 'name': 'tmp-volume', 'mountPath': '/tmp', 'emptyDir': { 'medium': 'Memory', }, }, ], 'kubernetes': { 'securityContext': { 'allowPrivilegeEscalation': False, 'readOnlyRootFilesystem': True, 'runAsUser': 1001, 'runAsGroup': 2001, }, 'livenessProbe': { 'httpGet': { 'scheme': 'HTTP', 'path': '/', 'port': 8000, }, 'initialDelaySeconds': 30, 'timeoutSeconds': 30, }, }, }, ], 'serviceAccount': { 'roles': [ { 'rules': [ { 'apiGroups': [''], 'resources': ['secrets'], 'resourceNames': [ 'kubernetes-dashboard-key-holder', 'kubernetes-dashboard-certs', 'kubernetes-dashboard-csrf', ], 'verbs': ['get', 'update', 'delete'], }, { 'apiGroups': [''], 'resources': ['configmaps'], 'resourceNames': ['kubernetes-dashboard-settings'], 'verbs': ['get', 'update'], }, { 'apiGroups': [''], 'resources': ['services'], 'resourceNames': [ 'heapster', 'dashboard-metrics-scraper', ], 'verbs': ['proxy'], }, { 'apiGroups': [''], 'resources': ['services/proxy'], 'resourceNames': [ 'heapster', 'http:heapster', 'https:heapster', 'dashboard-metrics-scraper', 'http:dashboard-metrics-scraper', ], 'verbs': ['get'], }, { 'apiGroups': ['metrics.k8s.io'], 'resources': ['pods', 'nodes'], 'verbs': ['get', 'list', 'watch'], }, ], }, { 'global': True, 'rules': [ { 'apiGroups': ['metrics.k8s.io'], 'resources': ['pods', 'nodes'], 'verbs': ['get', 'list', 'watch'], }, ], }, ], }, }) self.model.unit.status = ActiveStatus()
class MetalLBSpeakerCharm(CharmBase): """MetalLB Speaker Charm.""" _stored = StoredState() def __init__(self, *args): """Charm initialization for events observation.""" super().__init__(*args) if not self.unit.is_leader(): self.unit.status = WaitingStatus("Waiting for leadership") return self.image = OCIImageResource(self, 'metallb-speaker-image') self.framework.observe(self.on.install, self._on_start) self.framework.observe(self.on.start, self._on_start) self.framework.observe(self.on.leader_elected, self._on_start) self.framework.observe(self.on.upgrade_charm, self._on_upgrade) self.framework.observe(self.on.remove, self._on_remove) # -- initialize states -- self._stored.set_default(k8s_objects_created=False) self._stored.set_default(started=False) self._stored.set_default(secret=b64encode( utils._random_secret(128).encode('utf-8')).decode('utf-8')) # -- base values -- self._stored.set_default(namespace=os.environ["JUJU_MODEL_NAME"]) def _on_start(self, event): """Occurs upon install, start, or upgrade of the charm.""" if self._stored.started: return self.unit.status = MaintenanceStatus("Fetching image info") try: image_info = self.image.fetch() except OCIImageResourceError: logging.exception('An error occured while fetching the image info') self.unit.status = BlockedStatus( "Error fetching image information") return if not self._stored.k8s_objects_created: self.unit.status = MaintenanceStatus("Creating supplementary " "Kubernetes objects") utils.create_k8s_objects(self._stored.namespace) self._stored.k8s_objects_created = True self.unit.status = MaintenanceStatus("Configuring pod") self.set_pod_spec(image_info) self.unit.status = ActiveStatus() self._stored.started = True def _on_upgrade(self, event): """Occurs when new charm code or image info is available.""" self._stored.started = False self._on_start(event) def _on_remove(self, event): """Remove artifacts created by the K8s API.""" self.unit.status = MaintenanceStatus("Removing supplementary " "Kubernetes objects") utils.remove_k8s_objects(self._stored.namespace) self.unit.status = MaintenanceStatus("Removing pod") self._stored.started = False self._stored.k8s_objects_created = False def set_pod_spec(self, image_info): """Set pod spec.""" self.model.pod.set_spec( { 'version': 3, 'serviceAccount': { 'roles': [{ 'global': True, 'rules': [ { 'apiGroups': [''], 'resources': ['services', 'endpoints', 'nodes'], 'verbs': ['get', 'list', 'watch'], }, { 'apiGroups': [''], 'resources': ['events'], 'verbs': ['create', 'patch'], }, { 'apiGroups': ['policy'], 'resourceNames': ['speaker'], 'resources': ['podsecuritypolicies'], 'verbs': ['use'], }, ], }], }, 'containers': [{ 'name': 'speaker', 'imageDetails': image_info, 'imagePullPolicy': 'Always', 'ports': [{ 'containerPort': 7472, 'protocol': 'TCP', 'name': 'monitoring' }], 'envConfig': { 'METALLB_NODE_NAME': { 'field': { 'path': 'spec.nodeName', 'api-version': 'v1' } }, 'METALLB_HOST': { 'field': { 'path': 'status.hostIP', 'api-version': 'v1' } }, 'METALLB_ML_BIND_ADDR': { 'field': { 'path': 'status.podIP', 'api-version': 'v1' } }, 'METALLB_ML_LABELS': "app=metallb,component=speaker", 'METALLB_ML_NAMESPACE': { 'field': { 'path': 'metadata.namespace', 'api-version': 'v1' } }, 'METALLB_ML_SECRET_KEY': { 'secret': { 'name': 'memberlist', 'key': 'secretkey' } } }, # TODO: add constraint fields once it exists in pod_spec # bug : https://bugs.launchpad.net/juju/+bug/1893123 # 'resources': { # 'limits': { # 'cpu': '100m', # 'memory': '100Mi', # } # }, 'kubernetes': { 'securityContext': { 'allowPrivilegeEscalation': False, 'readOnlyRootFilesystem': True, 'capabilities': { 'add': ['NET_ADMIN', 'NET_RAW', 'SYS_ADMIN'], 'drop': ['ALL'] }, }, # fields do not exist in pod_spec # 'TerminationGracePeriodSeconds': 2, }, }], 'kubernetesResources': { 'pod': { 'hostNetwork': True }, 'secrets': [{ 'name': 'memberlist', 'type': 'Opaque', 'data': { 'secretkey': self._stored.secret, } }] }, 'service': { 'annotations': { 'prometheus.io/port': '7472', 'prometheus.io/scrape': 'true' } }, }, )
class Operator(CharmBase): def __init__(self, *args): super().__init__(*args) self.log = logging.getLogger(__name__) self.image = OCIImageResource(self, "oci-image") for event in [ self.on.install, self.on.leader_elected, self.on.upgrade_charm, self.on.config_changed, self.on["kubeflow-profiles"].relation_changed, self.on["ingress"].relation_changed, ]: self.framework.observe(event, self.main) def main(self, event): try: self._check_model_name() self._check_leader() interfaces = self._get_interfaces() image_details = self._check_image_details() kf_profiles = self._check_kf_profiles(interfaces) except CheckFailed as check_failed: self.model.unit.status = check_failed.status return self._configure_mesh(interfaces) kf_profiles = list(kf_profiles.get_data().values())[0] profiles_service = kf_profiles["service-name"] model = self.model.name config = self.model.config self.model.unit.status = MaintenanceStatus("Setting pod spec") self.model.pod.set_spec( { "version": 3, "serviceAccount": { "roles": [{ "global": True, "rules": [ { "apiGroups": [""], "resources": ["events", "namespaces", "nodes"], "verbs": ["get", "list", "watch"], }, { "apiGroups": ["", "app.k8s.io"], "resources": [ "applications", "pods", "pods/exec", "pods/log", ], "verbs": ["get", "list", "watch"], }, { "apiGroups": [""], "resources": ["secrets", "configmaps"], "verbs": ["get"], }, ], }] }, "containers": [{ "name": "kubeflow-dashboard", "imageDetails": image_details, "envConfig": { "USERID_HEADER": "kubeflow-userid", "USERID_PREFIX": "", "PROFILES_KFAM_SERVICE_HOST": f"{profiles_service}.{model}", "REGISTRATION_FLOW": config["registration-flow"], "DASHBOARD_LINKS_CONFIGMAP": config["dashboard-configmap"], }, "ports": [{ "name": "ui", "containerPort": config["port"] }], "kubernetes": { "livenessProbe": { "httpGet": { "path": "/healthz", "port": config["port"] }, "initialDelaySeconds": 30, "periodSeconds": 30, } }, }], }, { "configMaps": { config["dashboard-configmap"]: { "settings": json.dumps({ "DASHBOARD_FORCE_IFRAME": True, }), "links": Path("src/config.json").read_text(), }, }, "kubernetesResources": { "customResources": { "profiles.kubeflow.org": [{ "apiVersion": "kubeflow.org/v1beta1", "kind": "Profile", "metadata": { "name": config["profile"] }, "spec": { "owner": { "kind": "User", "name": config["profile"] } }, }] }, }, }, ) self.model.unit.status = ActiveStatus() def _configure_mesh(self, interfaces): if interfaces["ingress"]: interfaces["ingress"].send_data({ "prefix": "/", "rewrite": "/", "service": self.model.app.name, "port": self.model.config["port"], }) def _check_model_name(self): if self.model.name != "kubeflow": # Remove when this bug is resolved: https://github.com/kubeflow/kubeflow/issues/6136 raise CheckFailed( "kubeflow-dashboard must be deployed to model named `kubeflow`:" " https://git.io/J6d35", BlockedStatus, ) def _check_leader(self): if not self.unit.is_leader(): # We can't do anything useful when not the leader, so do nothing. raise CheckFailed("Waiting for leadership", WaitingStatus) def _get_interfaces(self): try: interfaces = get_interfaces(self) except NoVersionsListed as err: raise CheckFailed(err, WaitingStatus) except NoCompatibleVersions as err: raise CheckFailed(err, BlockedStatus) return interfaces def _check_image_details(self): try: image_details = self.image.fetch() except OCIImageResourceError as e: raise CheckFailed(f"{e.status_message}: oci-image", e.status_type) return image_details def _check_kf_profiles(self, interfaces): if not ((kf_profiles := interfaces["kubeflow-profiles"]) and kf_profiles.get_data()): raise CheckFailed("Waiting for kubeflow-profiles relation data", WaitingStatus) return kf_profiles
class Operator(CharmBase): def __init__(self, *args): super().__init__(*args) if not self.model.unit.is_leader(): log.info("Not a leader, skipping set_pod_spec") self.model.unit.status = ActiveStatus() return self.image = OCIImageResource(self, "oci-image") self.framework.observe(self.on.install, self.set_pod_spec) self.framework.observe(self.on.upgrade_charm, self.set_pod_spec) self.framework.observe(self.on.config_changed, self.set_pod_spec) for rel in self.model.relations.keys(): self.framework.observe( self.on[rel].relation_changed, self.set_pod_spec, ) self.prometheus_provider = MetricsEndpointProvider( charm=self, relation_name="metrics-endpoint", jobs=[{ "metrics_path": self.config["executor-server-metrics-port-name"], "static_configs": [{ "targets": ["*:{}".format(self.config["metrics-port"])] }], }], ) self.dashboard_provider = GrafanaDashboardProvider( charm=self, relation_name="grafana-dashboard", ) def set_pod_spec(self, event): if not self.model.unit.is_leader(): log.info("Not a leader, skipping set_pod_spec") self.model.unit.status = ActiveStatus() return try: image_details = self.image.fetch() except OCIImageResourceError as e: self.model.unit.status = e.status log.info(e) return config = self.model.config tconfig = {k.replace("-", "_"): v for k, v in config.items()} tconfig["service"] = self.model.app.name tconfig["namespace"] = self.model.name env = Environment(loader=FileSystemLoader("src/templates/"), ) envs = { "AMBASSADOR_ENABLED": str(bool(self.model.relations["ambassador"])).lower(), "AMBASSADOR_SINGLE_NAMESPACE": str(config["ambassador-single-namespace"]).lower(), "CONTROLLER_ID": config["controller-id"], "DEFAULT_USER_ID": config["default-user-id"], "ENGINE_CONTAINER_IMAGE_AND_VERSION": config["engine-container-image-and-version"], "ENGINE_CONTAINER_IMAGE_PULL_POLICY": config["engine-container-image-pull-policy"], "ENGINE_CONTAINER_SERVICE_ACCOUNT_NAME": config["engine-container-service-account-name"], "ENGINE_CONTAINER_USER": config["engine-container-user"], "ENGINE_DEFAULT_CPU_LIMIT": config["engine-default-cpu-limit"], "ENGINE_DEFAULT_CPU_REQUEST": config["engine-default-cpu-request"], "ENGINE_DEFAULT_MEMORY_LIMIT": config["engine-default-memory-limit"], "ENGINE_DEFAULT_MEMORY_REQUEST": config["engine-default-memory-request"], "ENGINE_LOG_MESSAGES_EXTERNALLY": str(config["engine-log-messages-externally"]).lower(), "ENGINE_PROMETHEUS_PATH": config["engine-prometheus-path"], "ENGINE_SERVER_GRPC_PORT": config["engine-server-grpc-port"], "ENGINE_SERVER_PORT": config["engine-server-port"], "EXECUTOR_CONTAINER_IMAGE_AND_VERSION": config["executor-container-image-and-version"], "EXECUTOR_CONTAINER_IMAGE_PULL_POLICY": config["executor-container-image-pull-policy"], "EXECUTOR_CONTAINER_SERVICE_ACCOUNT_NAME": config["executor-container-service-account-name"], "EXECUTOR_CONTAINER_USER": config["executor-container-user"], "EXECUTOR_DEFAULT_CPU_LIMIT": config["executor-default-cpu-limit"], "EXECUTOR_DEFAULT_CPU_REQUEST": config["executor-default-cpu-request"], "EXECUTOR_DEFAULT_MEMORY_LIMIT": config["executor-default-memory-limit"], "EXECUTOR_DEFAULT_MEMORY_REQUEST": config["executor-default-memory-request"], "EXECUTOR_PROMETHEUS_PATH": config["executor-prometheus-path"], "EXECUTOR_REQUEST_LOGGER_DEFAULT_ENDPOINT": config["executor-request-logger-default-endpoint"], "EXECUTOR_SERVER_METRICS_PORT_NAME": config["executor-server-metrics-port-name"], "EXECUTOR_SERVER_PORT": config["executor-server-port"], "ISTIO_ENABLED": str(bool(self.model.relations["istio"])).lower(), "ISTIO_GATEWAY": config["istio-gateway"], "ISTIO_TLS_MODE": config["istio-tls-mode"], "KEDA_ENABLED": str(bool(self.model.relations["keda"])).lower(), "MANAGER_CREATE_RESOURCES": "true", "POD_NAMESPACE": self.model.name, "PREDICTIVE_UNIT_DEFAULT_ENV_SECRET_REF_NAME": config["predictive-unit-default-env-secret-ref-name"], "PREDICTIVE_UNIT_METRICS_PORT_NAME": config["predictive-unit-metrics-port-name"], "PREDICTIVE_UNIT_SERVICE_PORT": config["predictive-unit-service-port"], "RELATED_IMAGE_ENGINE": config["related-image-engine"], "RELATED_IMAGE_EXECUTOR": config["related-image-executor"], "RELATED_IMAGE_EXPLAINER": config["related-image-explainer"], "RELATED_IMAGE_MLFLOWSERVER": config["related-image-mlflowserver"], "RELATED_IMAGE_MOCK_CLASSIFIER": config["related-image-mock-classifier"], "RELATED_IMAGE_SKLEARNSERVER": config["related-image-sklearnserver"], "RELATED_IMAGE_STORAGE_INITIALIZER": config["related-image-storage-initializer"], "RELATED_IMAGE_TENSORFLOW": config["related-image-tensorflow"], "RELATED_IMAGE_TFPROXY": config["related-image-tfproxy"], "RELATED_IMAGE_XGBOOSTSERVER": config["related-image-xgboostserver"], "USE_EXECUTOR": str(config["use-executor"]).lower(), "WATCH_NAMESPACE": config["watch-namespace"], } self.model.unit.status = MaintenanceStatus("Setting pod spec") self.model.pod.set_spec( { "version": 3, "serviceAccount": { "roles": [{ "global": True, "rules": [ { "apiGroups": [""], "resources": ["events"], "verbs": ["create", "patch"], }, { "apiGroups": [""], "resources": ["namespaces"], "verbs": ["get", "list", "watch"], }, { "apiGroups": [""], "resources": ["services"], "verbs": [ "create", "delete", "get", "list", "patch", "update", "watch", ], }, { "apiGroups": ["apps"], "resources": ["deployments"], "verbs": [ "create", "delete", "get", "list", "patch", "update", "watch", ], }, { "apiGroups": ["apps"], "resources": ["deployments/status"], "verbs": ["get", "patch", "update"], }, { "apiGroups": ["autoscaling"], "resources": ["horizontalpodautoscalers"], "verbs": [ "create", "delete", "get", "list", "patch", "update", "watch", ], }, { "apiGroups": ["autoscaling"], "resources": ["horizontalpodautoscalers/status"], "verbs": ["get", "patch", "update"], }, { "apiGroups": ["keda.sh"], "resources": ["scaledobjects"], "verbs": [ "create", "delete", "get", "list", "patch", "update", "watch", ], }, { "apiGroups": ["keda.sh"], "resources": ["scaledobjects/finalizers"], "verbs": [ "create", "delete", "get", "list", "patch", "update", "watch", ], }, { "apiGroups": ["keda.sh"], "resources": ["scaledobjects/status"], "verbs": ["get", "patch", "update"], }, { "apiGroups": ["machinelearning.seldon.io"], "resources": ["seldondeployments"], "verbs": [ "create", "delete", "get", "list", "patch", "update", "watch", ], }, { "apiGroups": ["machinelearning.seldon.io"], "resources": ["seldondeployments/finalizers"], "verbs": ["get", "patch", "update"], }, { "apiGroups": ["machinelearning.seldon.io"], "resources": ["seldondeployments/status"], "verbs": ["get", "patch", "update"], }, { "apiGroups": ["networking.istio.io"], "resources": ["destinationrules"], "verbs": [ "create", "delete", "get", "list", "patch", "update", "watch", ], }, { "apiGroups": ["networking.istio.io"], "resources": ["destinationrules/status"], "verbs": ["get", "patch", "update"], }, { "apiGroups": ["networking.istio.io"], "resources": ["virtualservices"], "verbs": [ "create", "delete", "get", "list", "patch", "update", "watch", ], }, { "apiGroups": ["networking.istio.io"], "resources": ["virtualservices/status"], "verbs": ["get", "patch", "update"], }, { "apiGroups": ["policy"], "resources": ["poddisruptionbudgets"], "verbs": [ "create", "delete", "get", "list", "patch", "update", "watch", ], }, { "apiGroups": ["policy"], "resources": ["poddisruptionbudgets/status"], "verbs": ["get", "patch", "update"], }, { "apiGroups": ["v1"], "resources": ["namespaces"], "verbs": ["get", "list", "watch"], }, { "apiGroups": ["v1"], "resources": ["services"], "verbs": [ "create", "delete", "get", "list", "patch", "update", "watch", ], }, { "apiGroups": ["v1"], "resources": ["services/status"], "verbs": ["get", "patch", "update"], }, { "apiGroups": ["apiextensions.k8s.io"], "resources": ["customresourcedefinitions"], "verbs": ["create", "get", "list"], }, { "apiGroups": ["admissionregistration.k8s.io"], "resources": ["validatingwebhookconfigurations"], "verbs": [ "get", "list", "create", "delete", "update", ], }, { "apiGroups": [""], "resources": ["configmaps"], "verbs": [ "get", "list", "watch", "create", "update", "patch", "delete", ], }, ], }] }, "containers": [{ "name": "seldon-core", "command": ["/manager"], "args": [ "--enable-leader-election", "--webhook-port", config["webhook-port"], "--create-resources", "true", ], "imageDetails": image_details, "ports": [ { "name": "metrics", "containerPort": int(config["metrics-port"]), }, { "name": "webhook", "containerPort": int(config["webhook-port"]), }, ], "envConfig": envs, "volumeConfig": [{ "name": "operator-resources", "mountPath": "/tmp/operator-resources", "files": [{ "path": f"{name}.yaml", "content": env.get_template(f"{name}.yaml").render(tconfig), } for name in ( "configmap", "crd", "service", "validate", )], }], }], }, ) self.model.unit.status = ActiveStatus()
class KfpApiOperator(CharmBase): """Charm the Kubeflow Pipelines API. https://github.com/canonical/kfp-operators/ """ def __init__(self, *args): super().__init__(*args) self.log = logging.getLogger() self.image = OCIImageResource(self, "oci-image") self.prometheus_provider = MetricsEndpointProvider( charm=self, relation_name="metrics-endpoint", jobs=[ { "metrics_path": METRICS_PATH, "static_configs": [{"targets": ["*:{}".format(self.config["http-port"])]}], } ], ) self.dashboard_provider = GrafanaDashboardProvider(self) change_events = [ self.on.install, self.on.upgrade_charm, self.on.config_changed, self.on["mysql"].relation_changed, self.on["object-storage"].relation_changed, self.on["kfp-viz"].relation_changed, self.on["kfp-api"].relation_changed, ] for event in change_events: self.framework.observe(event, self._main) def _send_info(self, interfaces): if interfaces["kfp-api"]: interfaces["kfp-api"].send_data( { "service-name": f"{self.model.app.name}.{self.model.name}", "service-port": self.model.config["http-port"], } ) def _main(self, event): # Set up all relations/fetch required data try: self._check_leader() mysql = self._get_mysql() interfaces = self._get_interfaces() image_details = self.image.fetch() os = self._get_object_storage(interfaces) viz = self._get_viz(interfaces) except (CheckFailedError, OCIImageResourceError) as check_failed: self.model.unit.status = check_failed.status self.log.info(str(check_failed.status)) return self._send_info(interfaces) config, config_json = self._generate_config(mysql, os, viz) healthz = f"http://localhost:{config['http-port']}/apis/v1beta1/healthz" self.model.unit.status = MaintenanceStatus("Setting pod spec") self.model.pod.set_spec( { "version": 3, "serviceAccount": { "roles": [ { "global": True, "rules": [ { "apiGroups": [""], "resources": ["pods", "pods/log"], "verbs": ["get", "list", "delete"], }, { "apiGroups": ["argoproj.io"], "resources": ["workflows"], "verbs": [ "create", "get", "list", "watch", "update", "patch", "delete", ], }, { "apiGroups": ["kubeflow.org"], "resources": ["scheduledworkflows"], "verbs": [ "create", "get", "list", "update", "patch", "delete", ], }, { "apiGroups": ["authorization.k8s.io"], "resources": ["subjectaccessreviews"], "verbs": ["create"], }, { "apiGroups": ["authentication.k8s.io"], "resources": ["tokenreviews"], "verbs": ["create"], }, ], } ] }, "containers": [ { "name": "ml-pipeline-api-server", "imageDetails": image_details, "ports": [ { "name": "http", "containerPort": int(config["http-port"]), }, { "name": "grpc", "containerPort": int(config["grpc-port"]), }, ], "envConfig": { "POD_NAMESPACE": self.model.name, }, "volumeConfig": [ { "name": "config", "mountPath": "/config", "files": [ { "path": "config.json", "content": json.dumps(config_json), }, { "path": "sample_config.json", "content": Path("src/sample_config.json").read_text(), }, ], } ], "kubernetes": { "readinessProbe": { "exec": {"command": ["wget", "-q", "-S", "-O", "-", healthz]}, "initialDelaySeconds": 3, "periodSeconds": 5, "timeoutSeconds": 2, }, "livenessProbe": { "exec": {"command": ["wget", "-q", "-S", "-O", "-", healthz]}, "initialDelaySeconds": 3, "periodSeconds": 5, "timeoutSeconds": 2, }, }, } ], }, k8s_resources={ "kubernetesResources": { "services": [ { "name": config["k8s-service-name"], "spec": { "selector": {"app.kubernetes.io/name": self.model.app.name}, "ports": [ { "name": "grpc", "port": int(config["grpc-port"]), "protocol": "TCP", "targetPort": int(config["grpc-port"]), }, { "name": "http", "port": int(config["http-port"]), "protocol": "TCP", "targetPort": int(config["http-port"]), }, ], }, }, ], } }, ) self.model.unit.status = ActiveStatus() def _generate_config(self, mysql, os, viz): config = self.model.config config_json = { "DBConfig": { "ConMaxLifeTimeSec": "120", "DBName": mysql["database"], "DriverName": "mysql", "GroupConcatMaxLen": "4194304", "Host": mysql["host"], "Password": mysql["root_password"], "Port": mysql["port"], "User": "******", }, "ObjectStoreConfig": { "AccessKey": os["access-key"], "BucketName": "mlpipeline", "Host": f"{os['service']}.{os['namespace']}", "Multipart": {"Disable": "true"}, "PipelinePath": "pipelines", "Port": os["port"], "Region": "", "SecretAccessKey": os["secret-key"], "Secure": str(os["secure"]).lower(), }, "ARCHIVE_CONFIG_LOG_FILE_NAME": config["log-archive-filename"], "ARCHIVE_CONFIG_LOG_PATH_PREFIX": config["log-archive-prefix"], "AUTO_UPDATE_PIPELINE_DEFAULT_VERSION": config["auto-update-default-version"], "CACHE_IMAGE": config["cache-image"], "CACHE_NODE_RESTRICTIONS": "false", "CacheEnabled": str(config["cache-enabled"]).lower(), "DefaultPipelineRunnerServiceAccount": config["runner-sa"], "InitConnectionTimeout": config["init-connection-timeout"], "KUBEFLOW_USERID_HEADER": "kubeflow-userid", "KUBEFLOW_USERID_PREFIX": "", "MULTIUSER": "******", "ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_HOST": viz["service-name"], "ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_PORT": viz["service-port"], } return config, config_json def _check_leader(self): if not self.unit.is_leader(): # We can't do anything useful when not the leader, so do nothing. raise CheckFailedError("Waiting for leadership", WaitingStatus) def _get_interfaces(self): # Remove this abstraction when SDI adds .status attribute to NoVersionsListed, # NoCompatibleVersionsListed: # https://github.com/canonical/serialized-data-interface/issues/26 try: interfaces = get_interfaces(self) except NoVersionsListed as err: raise CheckFailedError(str(err), WaitingStatus) except NoCompatibleVersions as err: raise CheckFailedError(str(err), BlockedStatus) return interfaces def _get_mysql(self): mysql = self.model.relations["mysql"] if len(mysql) == 0: raise CheckFailedError("Missing required relation for mysql", BlockedStatus) elif len(mysql) > 1: raise CheckFailedError("Too many mysql relations", BlockedStatus) try: mysql = mysql[0] unit = list(mysql.units)[0] mysql = mysql.data[unit] except Exception as e: self.log.error( f"Encountered the following exception when parsing mysql relation: " f"{str(e)}" ) raise CheckFailedError( "Unexpected error when parsing mysql relation. See logs", BlockedStatus ) expected_attributes = ["database", "host", "root_password", "port"] missing_attributes = [ attribute for attribute in expected_attributes if attribute not in mysql ] if len(missing_attributes) == len(expected_attributes): raise CheckFailedError("Waiting for mysql relation data", WaitingStatus) elif len(missing_attributes) > 0: self.log.error( f"mysql relation data missing expected attributes '{missing_attributes}'" ) raise CheckFailedError( "Received incomplete data from mysql relation. See logs", BlockedStatus ) return mysql def _get_object_storage(self, interfaces): relation_name = "object-storage" return self._validate_sdi_interface(interfaces, relation_name) def _get_viz(self, interfaces): relation_name = "kfp-viz" default_viz_data = {"service-name": "unset", "service-port": "1234"} return self._validate_sdi_interface( interfaces, relation_name, default_return=default_viz_data ) def _validate_sdi_interface(self, interfaces: dict, relation_name: str, default_return=None): """Validates data received from SerializedDataInterface, returning the data if valid. Optionally can return a default_return value when no relation is established Raises: CheckFailed(..., Blocked) when no relation established (unless default_return set) CheckFailed(..., Blocked) if interface is not using SDI CheckFailed(..., Blocked) if data in interface fails schema check CheckFailed(..., Waiting) if we have a relation established but no data passed Params: interfaces: Returns: (dict) interface data """ # If nothing is related to this relation, return a default value or raise an error if relation_name not in interfaces or interfaces[relation_name] is None: if default_return is not None: return default_return else: raise CheckFailedError( f"Missing required relation for {relation_name}", BlockedStatus ) relations = interfaces[relation_name] if not isinstance(relations, SerializedDataInterface): raise CheckFailedError( f"Unexpected error with {relation_name} relation data - data not as expected", BlockedStatus, ) # Get and validate data from the relation try: # relations is a dict of {(ops.model.Relation, ops.model.Application): data} unpacked_relation_data = relations.get_data() except ValidationError as val_error: # Validation in .get_data() ensures if data is populated, it matches the schema and is # not incomplete self.log.error(val_error) raise CheckFailedError( f"Found incomplete/incorrect relation data for {relation_name}. See logs", BlockedStatus, ) # Check if we have an established relation with no data exchanged if len(unpacked_relation_data) == 0: raise CheckFailedError(f"Waiting for {relation_name} relation data", WaitingStatus) # Unpack data (we care only about the first element) data_dict = list(unpacked_relation_data.values())[0] # Catch if empty data dict is received (JSONSchema ValidationError above does not raise # when this happens) # Remove once addressed in: # https://github.com/canonical/serialized-data-interface/issues/28 if len(data_dict) == 0: raise CheckFailedError( f"Found incomplete/incorrect relation data for {relation_name}.", BlockedStatus, ) return data_dict
class OPAAuditCharm(CharmBase): """ A Juju Charm for OPA """ _stored = StoredState() def __init__(self, *args): super().__init__(*args) self.framework.observe(self.on.config_changed, self._on_config_changed) self.framework.observe(self.on.stop, self._on_stop) self.framework.observe(self.on.install, self._on_install) self._stored.set_default(things=[]) self.image = OCIImageResource(self, "gatekeeper-image") def _on_config_changed(self, _): """ Set a new Juju pod specification """ self._configure_pod() def _on_stop(self, _): """ Mark unit is inactive """ self.unit.status = MaintenanceStatus("Pod is terminating.") logger.info("Pod is terminating.") def _on_install(self, event): logger.info("Congratulations, the charm was properly installed!") def _build_pod_spec(self): """ Construct a Juju pod specification for OPA """ logger.debug("Building Pod Spec") crds = [] try: crds = [ yaml.load(Path(f).read_text()) for f in [ "files/configs.config.gatekeeper.sh.yaml", "files/constrainttemplates.templates.gatekeeper.sh.yaml", "files/constraintpodstatuses.status.gatekeeper.sh.yaml", "files/constrainttemplatepodstatuses.status.gatekeeper.sh.yaml", ] ] except yaml.YAMLError as exc: logger.error("Error in configuration file:", exc) crd_objects = [ CustomResourceDefintion(crd["metadata"]["name"], crd["spec"]) for crd in crds ] config = self.model.config spec_template = {} with open("files/pod-spec.yaml.jinja2") as fh: spec_template = Template(fh.read()) try: image_details = self.image.fetch() except OCIImageResourceError as e: self.model.unit.status = e.status return template_args = { "crds": crd_objects, "image_details": image_details, "imagePullPolicy": config["imagePullPolicy"], "app_name": self.app.name, "audit_cli_args": self._audit_cli_args(), "namespace": os.environ["JUJU_MODEL_NAME"], } spec = yaml.load(spec_template.render(**template_args)) print(f"Pod spec: {spec}") return spec def _audit_cli_args(self): """ Construct command line arguments for OPA Audit """ args = [ "--operation=audit", "--operation=status", "--logtostderr", ] return args def _configure_pod(self): """ Setup a new opa pod specification """ logger.debug("Configuring Pod") if not self.unit.is_leader(): self.unit.status = ActiveStatus() return self.unit.status = MaintenanceStatus("Setting pod spec.") pod_spec = self._build_pod_spec() self.model.pod.set_spec(pod_spec) self.unit.status = ActiveStatus()
class Operator(CharmBase): """Deploys the katib-db-manager service.""" def __init__(self, framework): super().__init__(framework) if not self.model.unit.is_leader(): logger.info("Not a leader, skipping any work") self.model.unit.status = ActiveStatus() return self.image = OCIImageResource(self, "oci-image") self.framework.observe(self.on.install, self.set_pod_spec) self.framework.observe(self.on.config_changed, self.set_pod_spec) self.framework.observe(self.on.upgrade_charm, self.set_pod_spec) self.framework.observe(self.on["mysql"].relation_joined, self.set_pod_spec) self.framework.observe(self.on["mysql"].relation_changed, self.set_pod_spec) def set_pod_spec(self, event): self.model.unit.status = MaintenanceStatus("Setting pod spec") try: image_details = self.image.fetch() except OCIImageResourceError as e: self.model.unit.status = e.status return try: relation = self.model.relations["mysql"][0] unit = next(iter(relation.units)) mysql_data = relation.data[unit] # Ensure we've got some data sent over the relation mysql_data["root_password"] except (IndexError, StopIteration, KeyError): self.model.unit.status = WaitingStatus( "Waiting for mysql connection information") return self.model.pod.set_spec( { "version": 3, "serviceAccount": { "roles": [{ "global": True, "rules": [ { "apiGroups": [""], "resources": [ "configmaps", "namespaces", ], "verbs": ["*"], }, { "apiGroups": ["kubeflow.org"], "resources": [ "experiments", "trials", "suggestions", ], "verbs": ["*"], }, ], }] }, "containers": [{ "name": "katib-db-manager", "command": ["./katib-db-manager"], "imageDetails": image_details, "ports": [{ "name": "api", "containerPort": self.model.config["port"], }], "envConfig": { "DB_NAME": "mysql", "DB_USER": "******", "DB_PASSWORD": mysql_data["root_password"], "KATIB_MYSQL_DB_HOST": mysql_data["host"], "KATIB_MYSQL_DB_PORT": mysql_data["port"], "KATIB_MYSQL_DB_DATABASE": mysql_data["database"], }, "kubernetes": { "readinessProbe": { "exec": { "command": [ "/bin/grpc_health_probe", f"-addr=:{self.model.config['port']}", ] }, "initialDelaySeconds": 5, }, "livenessProbe": { "exec": { "command": [ "/bin/grpc_health_probe", f"-addr=:{self.model.config['port']}", ] }, "initialDelaySeconds": 10, "periodSeconds": 60, "failureThreshold": 5, }, }, }], }, ) self.model.unit.status = ActiveStatus()
class MultusCharm(CharmBase): def __init__(self, framework, key): super().__init__(framework, key) self.multus_image = OCIImageResource(self, 'multus-image') self.nadm_image = OCIImageResource(self, 'net-attach-def-manager-image') self.framework.observe(self.on.install, self.set_pod_spec) self.framework.observe(self.on.upgrade_charm, self.set_pod_spec) self.framework.observe(self.on.config_changed, self.set_pod_spec) def set_pod_spec(self, event): if not self.model.unit.is_leader(): log.info('Not a leader, skipping set_pod_spec') self.model.unit.status = ActiveStatus() return try: multus_image_details = self.multus_image.fetch() nadm_image_details = self.nadm_image.fetch() except OCIImageResourceError as e: self.model.unit.status = e.status return net_attach_defs_str = self.model.config.get( 'network-attachment-definitions', '') invalid_net_attach_def_status = BlockedStatus( 'network-attachment-definitions config is invalid, see debug-log') try: net_attach_defs = list(yaml.safe_load_all(net_attach_defs_str)) except yaml.YAMLError: log.error(traceback.format_exc()) self.model.unit.status = invalid_net_attach_def_status return for net_attach_def in net_attach_defs: if net_attach_def.get('apiVersion') != 'k8s.cni.cncf.io/v1': log.error('network-attachment-definitions config is invalid:' + ' apiVersion must be k8s.cni.cncf.io/v1') self.model.unit.status = invalid_net_attach_def_status return if net_attach_def.get('kind') != 'NetworkAttachmentDefinition': log.error('network-attachment-definitions config is invalid:' + ' kind must be NetworkAttachmentDefinition') self.model.unit.status = invalid_net_attach_def_status return if not net_attach_def.get('metadata', {}).get('name'): log.error('network-attachment-definitions config is invalid:' + ' metadata.name is required') self.model.unit.status = invalid_net_attach_def_status return if not net_attach_def.get('spec', {}).get('config'): log.error('network-attachment-definitions config is invalid:' + ' spec.config is required') self.model.unit.status = invalid_net_attach_def_status return for net_attach_def in net_attach_defs: net_attach_def['metadata'].setdefault('namespace', self.model.name) self.model.unit.status = MaintenanceStatus('Setting pod spec') pod_spec = { 'version': 3, 'containers': [{ 'name': 'kube-multus', 'imageDetails': multus_image_details, 'command': ['/entrypoint.sh'], 'args': ['--multus-conf-file=auto', '--cni-version=0.3.1'], 'volumeConfig': [{ 'name': 'cni', 'mountPath': '/host/etc/cni/net.d', 'hostPath': { 'path': '/etc/cni/net.d' } }, { 'name': 'cnibin', 'mountPath': '/host/opt/cni/bin', 'hostPath': { 'path': '/opt/cni/bin' } }], 'kubernetes': { 'securityContext': { 'privileged': True } } }, { 'name': 'net-attach-def-manager', 'imageDetails': nadm_image_details, 'volumeConfig': [{ 'name': 'config', 'mountPath': '/config', 'files': [{ 'path': 'manifest.yaml', 'content': yaml.safe_dump_all(net_attach_defs) or '# empty' }] }] }], 'serviceAccount': { 'roles': [{ 'global': True, 'rules': [{ 'apiGroups': ['k8s.cni.cncf.io'], 'resources': ['*'], 'verbs': ['*'] }, { 'apiGroups': [''], 'resources': ['pods', 'pods/status'], 'verbs': ['get', 'update'] }] }] }, 'kubernetesResources': { 'pod': { 'hostNetwork': True }, 'customResourceDefinitions': [{ 'name': 'network-attachment-definitions.k8s.cni.cncf.io', 'spec': { 'group': 'k8s.cni.cncf.io', 'scope': 'Namespaced', 'names': { 'plural': 'network-attachment-definitions', 'singular': 'network-attachment-definition', 'kind': 'NetworkAttachmentDefinition', 'shortNames': ['net-attach-def'] }, 'versions': [{ 'name': 'v1', 'served': True, 'storage': True }], 'validation': { 'openAPIV3Schema': { 'type': 'object', 'properties': { 'spec': { 'type': 'object', 'properties': { 'config': { 'type': 'string' } } } } } } } }] } } self.model.pod.set_spec(pod_spec) self.model.unit.status = ActiveStatus()
class KfpUiOperator(CharmBase): """Charm for the Kubeflow Pipelines UI. https://github.com/canonical/kfp-operators """ def __init__(self, *args): super().__init__(*args) self.log = logging.getLogger() self.image = OCIImageResource(self, "oci-image") self.framework.observe(self.on.install, self._main) self.framework.observe(self.on.upgrade_charm, self._main) self.framework.observe(self.on.config_changed, self._main) self.framework.observe(self.on["object-storage"].relation_changed, self._main) self.framework.observe(self.on["kfp-api"].relation_changed, self._main) self.framework.observe(self.on["ingress"].relation_changed, self._main) self.framework.observe(self.on["kfp-ui"].relation_changed, self._main) self.framework.observe(self.on.leader_elected, self._main) def _main(self, event): try: self._check_leader() interfaces = self._get_interfaces() image_details = self.image.fetch() os = self._validate_sdi_interface(interfaces, "object-storage") kfp_api = self._validate_sdi_interface(interfaces, "kfp-api") except (CheckFailedError, OCIImageResourceError) as check_failed: self.model.unit.status = check_failed.status self.log.info(str(check_failed.status)) return self._send_ui_info(interfaces) self._send_ingress_info(interfaces) config = self.model.config healthz = f"http://localhost:{config['http-port']}/apis/v1beta1/healthz" charm_name = self.model.app.name env = { "ALLOW_CUSTOM_VISUALIZATIONS": str(config["allow-custom-visualizations"]).lower(), "ARGO_ARCHIVE_ARTIFACTORY": "minio", "ARGO_ARCHIVE_BUCKETNAME": "mlpipeline", "ARGO_ARCHIVE_LOGS": "false", "ARGO_ARCHIVE_PREFIX": "logs", # TODO: This should come from relation to kfp-profile-controller. It is the name/port # of the user-specific artifact accessor "ARTIFACTS_SERVICE_PROXY_NAME": "ml-pipeline-ui-artifact", "ARTIFACTS_SERVICE_PROXY_PORT": "80", "ARTIFACTS_SERVICE_PROXY_ENABLED": "true", "AWS_ACCESS_KEY_ID": "", "AWS_SECRET_ACCESS_KEY": "", "DISABLE_GKE_METADATA": "false", "ENABLE_AUTHZ": "true", "DEPLOYMENT": "KUBEFLOW", "HIDE_SIDENAV": str(config["hide-sidenav"]).lower(), "HTTP_AUTHORIZATION_DEFAULT_VALUE": "", "HTTP_AUTHORIZATION_KEY": "", "HTTP_BASE_URL": "", "KUBEFLOW_USERID_HEADER": "kubeflow-userid", "KUBEFLOW_USERID_PREFIX": "", "METADATA_ENVOY_SERVICE_SERVICE_HOST": "localhost", "METADATA_ENVOY_SERVICE_SERVICE_PORT": "9090", "minio-secret": { "secret": { "name": f"{charm_name}-minio-secret" } }, "MINIO_HOST": os["service"], "MINIO_NAMESPACE": os["namespace"], "MINIO_PORT": os["port"], "MINIO_SSL": os["secure"], "ML_PIPELINE_SERVICE_HOST": kfp_api["service-name"], "ML_PIPELINE_SERVICE_PORT": kfp_api["service-port"], "STREAM_LOGS_FROM_SERVER_API": "false", # TODO: Think there's a file here we should copy in. Workload's logs show an error on # start for this "VIEWER_TENSORBOARD_POD_TEMPLATE_SPEC_PATH": "/etc/config/viewer-pod-template.json", "VIEWER_TENSORBOARD_TF_IMAGE_NAME": "tensorflow/tensorflow", } # TODO: Not sure if this gets used. I don't see it in regular pipeline manifests config_json = json.dumps( {"spec": { "serviceAccountName": "kubeflow-pipelines-viewer" }}) viewer_pod_template = json.dumps( {"spec": { "serviceAccountName": "default-editor" }}) self.model.unit.status = MaintenanceStatus("Setting pod spec") self.model.pod.set_spec( { "version": 3, "serviceAccount": { "roles": [{ "global": True, "rules": [ { "apiGroups": [""], "resources": ["pods", "pods/log"], "verbs": ["get"], }, { "apiGroups": [""], "resources": ["events"], "verbs": ["list"], }, { "apiGroups": [""], "resources": ["secrets"], "verbs": ["get", "list"], }, { "apiGroups": ["kubeflow.org"], "resources": ["viewers"], "verbs": [ "create", "get", "list", "watch", "delete", ], }, { "apiGroups": ["argoproj.io"], "resources": ["workflows"], "verbs": ["get", "list"], }, ], }] }, "containers": [{ "name": "ml-pipeline-ui", "imageDetails": image_details, "ports": [ { "name": "http", "containerPort": int(config["http-port"]), }, ], "envConfig": env, "volumeConfig": [ { "name": "config", "mountPath": "/config", "files": [ { "path": "config.json", "content": config_json, }, ], }, { "name": "viewer-pod-template", "mountPath": "/etc/config", "files": [ { "path": "viewer-pod-template.json", "content": viewer_pod_template, }, ], }, ], "kubernetes": { "readinessProbe": { "exec": { "command": ["wget", "-q", "-S", "-O", "-", healthz] }, "initialDelaySeconds": 3, "periodSeconds": 5, "timeoutSeconds": 2, }, "livenessProbe": { "exec": { "command": ["wget", "-q", "-S", "-O", "-", healthz] }, "initialDelaySeconds": 3, "periodSeconds": 5, "timeoutSeconds": 2, }, }, }], "kubernetesResources": { "secrets": [{ "name": f"{charm_name}-minio-secret", "type": "Opaque", "data": { k: b64encode(v.encode("utf-8")).decode("utf-8") for k, v in { "MINIO_ACCESS_KEY": os["access-key"], "MINIO_SECRET_KEY": os["secret-key"], }.items() }, }] }, }, ) self.model.unit.status = ActiveStatus() def _send_ui_info(self, interfaces): if interfaces["kfp-ui"]: interfaces["kfp-ui"].send_data({ "service-name": f"{self.model.app.name}.{self.model.name}", "service-port": self.model.config["http-port"], }) def _send_ingress_info(self, interfaces): if interfaces["ingress"]: interfaces["ingress"].send_data({ "prefix": "/pipeline", "rewrite": "/pipeline", "service": self.model.app.name, # TODO: Should this be name.namespace? "port": int(self.model.config["http-port"]), }) def _check_leader(self): if not self.unit.is_leader(): # We can't do anything useful when not the leader, so do nothing. raise CheckFailedError("Waiting for leadership", WaitingStatus) def _get_interfaces(self): # Remove this abstraction when SDI adds .status attribute to NoVersionsListed, # NoCompatibleVersionsListed: # https://github.com/canonical/serialized-data-interface/issues/26 try: interfaces = get_interfaces(self) except NoVersionsListed as err: raise CheckFailedError(str(err), WaitingStatus) except NoCompatibleVersions as err: raise CheckFailedError(str(err), BlockedStatus) return interfaces def _validate_sdi_interface(self, interfaces: dict, relation_name: str, default_return=None): """Validates data received from SerializedDataInterface, returning the data if valid. Optionally can return a default_return value when no relation is established Raises: CheckFailed(..., Blocked) when no relation established (unless default_return set) CheckFailed(..., Blocked) if interface is not using SDI CheckFailed(..., Blocked) if data in interface fails schema check CheckFailed(..., Waiting) if we have a relation established but no data passed Params: interfaces: Returns: (dict) interface data """ # If nothing is related to this relation, return a default value or raise an error if relation_name not in interfaces or interfaces[relation_name] is None: if default_return is not None: return default_return else: raise CheckFailedError( f"Missing required relation for {relation_name}", BlockedStatus) relations = interfaces[relation_name] if not isinstance(relations, SerializedDataInterface): raise CheckFailedError( f"Unexpected error with {relation_name} relation data - data not as expected", BlockedStatus, ) # Get and validate data from the relation try: # relations is a dict of {(ops.model.Relation, ops.model.Application): data} unpacked_relation_data = relations.get_data() except ValidationError as val_error: # Validation in .get_data() ensures if data is populated, it matches the schema and is # not incomplete self.log.error(val_error) raise CheckFailedError( f"Found incomplete/incorrect relation data for {relation_name}. See logs", BlockedStatus, ) # Check if we have an established relation with no data exchanged if len(unpacked_relation_data) == 0: raise CheckFailedError( f"Waiting for {relation_name} relation data", WaitingStatus) # Unpack data (we care only about the first element) data_dict = list(unpacked_relation_data.values())[0] # Catch if empty data dict is received (JSONSchema ValidationError above does not raise # when this happens) # Remove once addressed in: # https://github.com/canonical/serialized-data-interface/issues/28 if len(data_dict) == 0: raise CheckFailedError( f"Found incomplete/incorrect relation data for {relation_name}.", BlockedStatus, ) return data_dict
class Operator(CharmBase): """Deploys the katib-controller service.""" _stored = StoredState() def __init__(self, framework): super().__init__(framework) self._stored.set_default(**self.gen_certs()) self.image = OCIImageResource(self, "oci-image") self.prometheus_provider = MetricsEndpointProvider( charm=self, jobs=[{ "job_name": "katib_controller_metrics", "static_configs": [{ "targets": [f"*:{self.config['metrics-port']}"] }], }], ) self.dashboard_provider = GrafanaDashboardProvider(self) for event in [ self.on.config_changed, self.on.install, self.on.leader_elected, self.on.upgrade_charm, ]: self.framework.observe(event, self.set_pod_spec) def set_pod_spec(self, event): self.model.unit.status = MaintenanceStatus("Setting pod spec") try: self._check_leader() image_details = self._check_image_details() except CheckFailed as check_failed: self.model.unit.status = check_failed.status return validating, mutating = yaml.safe_load_all( Path("src/webhooks.yaml").read_text()) self.model.pod.set_spec( { "version": 3, "serviceAccount": { "roles": [{ "global": True, "rules": [ { "apiGroups": [""], "resources": [ "configmaps", "serviceaccounts", "services", "events", "namespaces", "persistentvolumes", "persistentvolumeclaims", "pods", "pods/log", "pods/status", ], "verbs": ["*"], }, { "apiGroups": ["apps"], "resources": ["deployments"], "verbs": ["*"], }, { "apiGroups": ["rbac.authorization.k8s.io"], "resources": [ "roles", "rolebindings", ], "verbs": ["*"], }, { "apiGroups": ["batch"], "resources": ["jobs", "cronjobs"], "verbs": ["*"], }, { "apiGroups": ["kubeflow.org"], "resources": [ "experiments", "experiments/status", "experiments/finalizers", "trials", "trials/status", "trials/finalizers", "suggestions", "suggestions/status", "suggestions/finalizers", "tfjobs", "pytorchjobs", "mpijobs", "xgboostjobs", "mxjobs", ], "verbs": ["*"], }, ], }], }, "containers": [{ "name": "katib-controller", "imageDetails": image_details, "command": ["./katib-controller"], "args": [ f"--webhook-port={self.model.config['webhook-port']}", "--trial-resources=Job.v1.batch", "--trial-resources=TFJob.v1.kubeflow.org", "--trial-resources=PyTorchJob.v1.kubeflow.org", "--trial-resources=MPIJob.v1.kubeflow.org", "--trial-resources=PipelineRun.v1beta1.tekton.dev", ], "ports": [ { "name": "webhook", "containerPort": self.model.config["webhook-port"], }, { "name": "metrics", "containerPort": self.model.config["metrics-port"], }, ], "envConfig": { "KATIB_CORE_NAMESPACE": self.model.name, }, "volumeConfig": [{ "name": "certs", "mountPath": "/tmp/cert", "files": [ { "path": "tls.crt", "content": self._stored.cert, }, { "path": "tls.key", "content": self._stored.key, }, ], }], }], }, k8s_resources={ "kubernetesResources": { "customResourceDefinitions": [{ "name": crd["metadata"]["name"], "spec": crd["spec"] } for crd in yaml.safe_load_all( Path("src/crds.yaml").read_text())], "mutatingWebhookConfigurations": [{ "name": mutating["metadata"]["name"], "webhooks": mutating["webhooks"], }], "validatingWebhookConfigurations": [{ "name": validating["metadata"]["name"], "webhooks": validating["webhooks"], }], }, "configMaps": { "katib-config": { f: Path(f"src/{f}.json").read_text() for f in ( "metrics-collector-sidecar", "suggestion", "early-stopping", ) }, "trial-template": { f + suffix: Path(f"src/{f}.yaml").read_text() for f, suffix in ( ("defaultTrialTemplate", ".yaml"), ("enasCPUTemplate", ""), ("pytorchJobTemplate", ""), ) }, }, }, ) self.model.unit.status = ActiveStatus() def gen_certs(self): model = self.model.name app = self.model.app.name Path("/run/ssl.conf").write_text(f"""[ req ] default_bits = 2048 prompt = no default_md = sha256 req_extensions = req_ext distinguished_name = dn [ dn ] C = GB ST = Canonical L = Canonical O = Canonical OU = Canonical CN = 127.0.0.1 [ req_ext ] subjectAltName = @alt_names [ alt_names ] DNS.1 = {app} DNS.2 = {app}.{model} DNS.3 = {app}.{model}.svc DNS.4 = {app}.{model}.svc.cluster DNS.5 = {app}.{model}.svc.cluster.local IP.1 = 127.0.0.1 [ v3_ext ] authorityKeyIdentifier=keyid,issuer:always basicConstraints=CA:FALSE keyUsage=keyEncipherment,dataEncipherment,digitalSignature extendedKeyUsage=serverAuth,clientAuth subjectAltName=@alt_names""") check_call(["openssl", "genrsa", "-out", "/run/ca.key", "2048"]) check_call(["openssl", "genrsa", "-out", "/run/server.key", "2048"]) check_call([ "openssl", "req", "-x509", "-new", "-sha256", "-nodes", "-days", "3650", "-key", "/run/ca.key", "-subj", "/CN=127.0.0.1", "-out", "/run/ca.crt", ]) check_call([ "openssl", "req", "-new", "-sha256", "-key", "/run/server.key", "-out", "/run/server.csr", "-config", "/run/ssl.conf", ]) check_call([ "openssl", "x509", "-req", "-sha256", "-in", "/run/server.csr", "-CA", "/run/ca.crt", "-CAkey", "/run/ca.key", "-CAcreateserial", "-out", "/run/cert.pem", "-days", "365", "-extensions", "v3_ext", "-extfile", "/run/ssl.conf", ]) return { "cert": Path("/run/cert.pem").read_text(), "key": Path("/run/server.key").read_text(), "ca": Path("/run/ca.crt").read_text(), } def _check_leader(self): if not self.unit.is_leader(): # We can't do anything useful when not the leader, so do nothing. raise CheckFailed("Waiting for leadership", WaitingStatus) def _check_image_details(self): try: image_details = self.image.fetch() except OCIImageResourceError as e: raise CheckFailed(f"{e.status.message}", e.status_type) return image_details
class CharmedOsmBase(CharmBase): """CharmedOsmBase Charm.""" state = StoredState() def __init__( self, *args, oci_image="image", debug_mode_config_key=None, debug_pubkey_config_key=None, vscode_workspace: Dict = {}, mysql_uri: bool = False, ) -> NoReturn: """ CharmedOsmBase Charm constructor :params: oci_image: Resource name for main OCI image :params: debug_mode_config_key: Key in charm config for enabling debugging mode :params: debug_pubkey_config_key: Key in charm config for setting debugging public ssh key :params: vscode_workspace: VSCode workspace """ super().__init__(*args) # Internal state initialization self.state.set_default(pod_spec=None) self.image = OCIImageResource(self, oci_image) self.debugging_supported = debug_mode_config_key and debug_pubkey_config_key self.debug_mode_config_key = debug_mode_config_key self.debug_pubkey_config_key = debug_pubkey_config_key self.vscode_workspace = vscode_workspace self.mysql_uri = mysql_uri # Registering regular events self.framework.observe(self.on.config_changed, self.configure_pod) self.framework.observe(self.on.leader_elected, self.configure_pod) def build_pod_spec(self, image_info: Dict, **kwargs): """ Method to be implemented by the charm to build the pod spec :params: image_info: Image info details :params: kwargs: mysql_config (opslib.osm.config.mysql.MysqlModel): Mysql config object. Will be included if the charm has been initialized with mysql_uri=True. """ raise NotImplementedError("build_pod_spec is not implemented") def _debug(self, pod_spec: Dict) -> NoReturn: """ Activate debugging mode in the charm :params: pod_spec: Pod Spec to be debugged. Note: The first container is the one that will be debugged. """ container = pod_spec["containers"][0] if "readinessProbe" in container["kubernetes"]: container["kubernetes"].pop("readinessProbe") if "livenessProbe" in container["kubernetes"]: container["kubernetes"].pop("livenessProbe") container["ports"].append({ "name": "ssh", "containerPort": 22, "protocol": "TCP", }) container["volumeConfig"].append({ "name": "scripts", "mountPath": "/osm-debug-scripts", "files": [{ "path": "debug.sh", "content": Template(DEBUG_SCRIPT).substitute( pubkey=self.config[self.debug_pubkey_config_key], vscode_workspace=json.dumps( self.vscode_workspace, sort_keys=True, indent=4, separators=(",", ": "), ), ), "mode": 0o777, }], }) container["command"] = ["/osm-debug-scripts/debug.sh"] def _debug_if_needed(self, pod_spec): """ Debug the pod_spec if needed :params: pod_spec: Pod Spec to be debugged. """ if self.debugging_supported and self.config[ self.debug_mode_config_key]: if self.debug_pubkey_config_key not in self.config: raise Exception("debug_pubkey config is not set") self._debug(pod_spec) def _get_build_pod_spec_kwargs(self): """Get kwargs for the build_pod_spec function""" kwargs = {} if self.mysql_uri: kwargs["mysql_config"] = MysqlModel(**self.config) return kwargs def configure_pod(self, _=None) -> NoReturn: """Assemble the pod spec and apply it, if possible.""" try: if self.unit.is_leader(): self.unit.status = MaintenanceStatus("Assembling pod spec") image_info = self.image.fetch() kwargs = self._get_build_pod_spec_kwargs() pod_spec = self.build_pod_spec(image_info, **kwargs) self._debug_if_needed(pod_spec) self._set_pod_spec(pod_spec) self.unit.status = ActiveStatus("ready") except OCIImageResourceError: self.unit.status = BlockedStatus( "Error fetching image information") except ValidationError as e: logger.error(f"Config data validation error: {e}") logger.debug(traceback.format_exc()) self.unit.status = BlockedStatus(str(e)) except RelationsMissing as e: logger.error(f"Relation missing error: {e.message}") logger.debug(traceback.format_exc()) self.unit.status = BlockedStatus(e.message) except ModelError as e: self.unit.status = BlockedStatus(str(e)) except Exception as e: error_message = f"Unknown exception: {e}" logger.error(error_message) logger.debug(traceback.format_exc()) self.unit.status = BlockedStatus(error_message) def _set_pod_spec(self, pod_spec: Dict[str, Any]) -> NoReturn: pod_spec_hash = hash_from_dict(pod_spec) if self.state.pod_spec != pod_spec_hash: self.model.pod.set_spec(pod_spec) self.state.pod_spec = pod_spec_hash logger.debug(f"applying pod spec with hash {pod_spec_hash}")
class NrfCharm(CharmBase): """NRF charm events class definition""" state = StoredState() def __init__(self, *args): """NRF charm constructor.""" super().__init__(*args) # Internal state initialization self.state.set_default(pod_spec=None) self.image = OCIImageResource(self, "image") # Registering regular events self.framework.observe(self.on.config_changed, self.configure_pod) # Registering required relation changed events self.framework.observe(self.on.mongodb_relation_changed, self._on_mongodb_relation_changed) # Registering required relation broken events self.framework.observe(self.on.mongodb_relation_broken, self._on_mongodb_relation_broken) # -- initialize states -- self.state.set_default(mongodb_host=None, mongodb_uri=None) def publish_nrf_info(self, _=None) -> NoReturn: """Publishes NRF information relation.7 """ if self.unit.is_leader(): relation_id = self.model.relations.__getitem__("nrf") for i in relation_id: relation = self.model.get_relation("nrf", i.id) relation.data[self.model.app]["hostname"] = self.model.app.name def _on_mongodb_relation_changed(self, event: EventBase) -> NoReturn: """Reads information about the MongoDB relation. Args: event (EventBase): MongoDB relation event. """ if event.app not in event.relation.data: return mongodb_host = event.relation.data[event.app].get("hostname") mongodb_uri = event.relation.data[event.app].get("mongodb_uri") validate_mongodb = mongodb_host and mongodb_uri host_state = self.state.mongodb_host != mongodb_host uri_state = self.state.mongodb_uri != mongodb_uri validate_state = host_state or uri_state if validate_mongodb and validate_state: self.state.mongodb_host = mongodb_host self.state.mongodb_uri = mongodb_uri self.configure_pod() def _on_mongodb_relation_broken(self, _=None) -> NoReturn: """Clears data from MongoDB relation.""" self.state.mongodb_host = None self.state.mongodb_uri = None self.configure_pod() def _missing_relations(self) -> str: """Checks if there missing relations. Returns: str: string with missing relations """ data_status = {"mongodb": self.state.mongodb_uri} missing_relations = [k for k, v in data_status.items() if not v] return ", ".join(missing_relations) @property def relation_state(self) -> Dict[str, Any]: """Collects relation state configuration for pod spec assembly. Returns: Dict[str, Any]: relation state information. """ relation_state = { "mongodb_host": self.state.mongodb_host, "mongodb_uri": self.state.mongodb_uri, } return relation_state def configure_pod(self, _=None) -> NoReturn: """Assemble the pod spec and apply it, if possible.""" missing = self._missing_relations() if missing: status = "Waiting for {0} relation{1}" self.unit.status = BlockedStatus( status.format(missing, "s" if "," in missing else "")) return if not self.unit.is_leader(): self.unit.status = ActiveStatus("ready") return self.unit.status = MaintenanceStatus("Assembling pod spec") # Fetch image information try: self.unit.status = MaintenanceStatus("Fetching image information") image_info = self.image.fetch() except OCIImageResourceError: self.unit.status = BlockedStatus( "Error fetching image information") return try: pod_spec = make_pod_spec( image_info, self.model.config, self.relation_state, self.model.app.name, ) except ValueError as exc: logger.exception("Config/Relation data validation error") self.unit.status = BlockedStatus(str(exc)) return if self.state.pod_spec != pod_spec: self.model.pod.set_spec(pod_spec) self.state.pod_spec = pod_spec self.unit.status = ActiveStatus("ready") self.publish_nrf_info()
class RancherCharm(CharmBase): def __init__(self, *args): super().__init__(*args) if not self.unit.is_leader(): # We can't do anything useful when not the leader, so do nothing. self.model.unit.status = WaitingStatus('Waiting for leadership') return self.log = logging.getLogger(__name__) self.rancher_image = OCIImageResource(self, 'rancher-image') for event in [ self.on.install, self.on.leader_elected, self.on.upgrade_charm, self.on.config_changed ]: self.framework.observe(event, self.main) def main(self, event): try: rancher_image_details = self.rancher_image.fetch() except OCIImageResourceError as e: self.model.unit.status = e.status return self.model.unit.status = MaintenanceStatus('Setting pod spec') self.model.pod.set_spec({ 'version': 3, 'service': { 'updateStrategy': { 'type': 'RollingUpdate', 'rollingUpdate': { 'maxUnavailable': 1 }, }, }, 'configMaps': { 'kubernetes-dashboard-settings': {}, }, 'containers': [ { 'name': self.model.app.name, 'imageDetails': rancher_image_details, 'imagePullPolicy': 'Always', 'ports': [ { 'name': 'rancher', 'containerPort': 80, 'protocol': 'TCP', }, ], 'args': [ "--https-listen-port=80", "--https-listen-port=443", "--add-local=true", "--debug", ], 'envConfig': { 'CATTLE_NAMESPACE': self.model.name, 'CATTLE_PEER_SERVICE': self.model.app.name, }, 'kubernetes': { 'livenessProbe': { 'httpGet': { 'scheme': 'HTTPS', 'path': '/healthz', 'port': 80, }, 'initialDelaySeconds': 60, 'periodSeconds': 30, }, 'readinessProbe': { 'httpGet': { 'scheme': 'HTTPS', 'path': '/healthz', 'port': 80, }, 'initialDelaySeconds': 5, 'periodSeconds': 30, }, }, }, ], 'serviceAccount': { 'roles': [{ 'global': True, 'rules': [ { 'apiGroups': ["*"], 'resources': ["*"], 'verbs': ["*"], }, { 'nonResourceURLs': ["*"], 'verbs': ["*"], }, ], }], }, }) self.model.unit.status = ActiveStatus()
class AdmissionWebhookCharm(CharmBase): """Deploys the admission-webhook service. Handles injecting common data such as secrets and environment variables into Kubeflow pods. """ def __init__(self, framework): super().__init__(framework) self.image = OCIImageResource(self, "oci-image") self.framework.observe(self.on.install, self.set_pod_spec) self.framework.observe(self.on.upgrade_charm, self.set_pod_spec) self.framework.observe( self.on.pod_defaults_relation_changed, self.set_pod_spec, ) def set_pod_spec(self, event): if not self.model.unit.is_leader(): logger.info("Not a leader, skipping set_pod_spec") self.model.unit.status = ActiveStatus() return self.model.unit.status = MaintenanceStatus("Setting pod spec") pod_defaults = { key.name: dict(value)["pod-defaults"] for relation in self.model.relations["pod-defaults"] for key, value in relation.data.items() if isinstance(key, Application) and not key._is_our_app } custom_resources = { "poddefaults.kubeflow.org": [{ "apiVersion": "kubeflow.org/v1alpha1", "kind": "PodDefault", "metadata": { "name": f"{charm}-{name}", }, "spec": { "selector": { "matchLabels": { f"{charm}-{name}": "true" }, }, "env": [{ "name": k, "value": v } for k, v in value["env"].items()], }, } for charm, defaults in pod_defaults.items() for name, value in json.loads(defaults).items()], } try: image_details = self.image.fetch() except OCIImageResourceError as e: self.model.unit.status = e.status return model = os.environ["JUJU_MODEL_NAME"] gen_certs(model, hookenv.service_name()) ca_bundle = b64encode( Path("/run/cert.pem").read_bytes()).decode("utf-8") self.model.pod.set_spec( { "version": 3, "serviceAccount": { "roles": [{ "global": True, "rules": [ { "apiGroups": ["kubeflow.org"], "resources": ["poddefaults"], "verbs": [ "get", "list", "watch", "update", "create", "patch", "delete", ], }, ], }], }, "containers": [{ "name": "admission-webhook", "imageDetails": image_details, "ports": [{ "name": "webhook", "containerPort": 443 }], "volumeConfig": [{ "name": "certs", "mountPath": "/etc/webhook/certs", "files": [ { "path": "cert.pem", "content": Path("/run/cert.pem").read_text(), }, { "path": "key.pem", "content": Path("/run/server.key").read_text(), }, ], }], }], }, k8s_resources={ "kubernetesResources": { "customResourceDefinitions": [{ "name": crd["metadata"]["name"], "spec": crd["spec"] } for crd in yaml.safe_load_all( Path("src/crds.yaml").read_text())], "customResources": custom_resources, "mutatingWebhookConfigurations": [{ "name": "admission-webhook", "webhooks": [ { "name": "admission-webhook.kubeflow.org", "failurePolicy": "Fail", "clientConfig": { "caBundle": ca_bundle, "service": { "name": hookenv.service_name(), "namespace": model, "path": "/apply-poddefault", }, }, "objectSelector": { "matchExpressions": [ { "key": "juju-app", "operator": "NotIn", "values": ["admission-webhook"], }, { "key": "app.kubernetes.io/name", "operator": "NotIn", "values": ["admission-webhook"], }, { "key": "juju-operator", "operator": "NotIn", "values": ["admission-webhook"], }, { "key": "operator.juju.is/name", "operator": "NotIn", "values": ["admission-webhook"], }, ] }, "rules": [{ "apiGroups": [""], "apiVersions": ["v1"], "operations": ["CREATE"], "resources": ["pods"], }], }, ], }], } }, ) self.model.unit.status = ActiveStatus()
class RedisCharm(CharmBase): state = StoredState() def __init__(self, *args): super().__init__(*args) logger.debug('Initializing charm') self.redis = RedisClient(host=self.model.app.name, port=DEFAULT_PORT) self.image = OCIImageResource(self, "redis-image") self.framework.observe(self.on.start, self.on_start) self.framework.observe(self.on.stop, self.on_stop) self.framework.observe(self.on.config_changed, self.configure_pod) self.framework.observe(self.on.upgrade_charm, self.configure_pod) self.framework.observe(self.on.update_status, self.update_status) self.framework.observe(self.on["datastore"].relation_changed, self.relation_changed) @log_event_handler def on_start(self, event): """Initialize Redis. This event handler is deferred if initialization of Redis fails. """ if not self.unit.is_leader(): self.unit.status = ActiveStatus() return if not self.redis.is_ready(): self.unit.status = WaitingStatus(WAITING_FOR_REDIS_MSG) logger.debug( "{}: deferring on_start".format(WAITING_FOR_REDIS_MSG)) event.defer() return self.set_ready_status() @log_event_handler def on_stop(self, _): """Mark terminating unit as inactive. """ self.redis.close() self.unit.status = MaintenanceStatus('Pod is terminating.') @log_event_handler def configure_pod(self, event): """Applies the pod configuration. """ if not self.unit.is_leader(): logger.debug("Spec changes ignored by non-leader") self.unit.status = ActiveStatus() return self.unit.status = WaitingStatus("Fetching image information ...") try: image_info = self.image.fetch() except OCIImageResourceError: self.unit.status = BlockedStatus( "Error fetching image information.") return # Build Pod spec builder = PodSpecBuilder( name=self.model.app.name, port=DEFAULT_PORT, image_info=image_info, ) spec = builder.build_pod_spec() logger.debug("Pod spec: \n{}".format(yaml.dump(spec))) # Applying pod spec. If the spec hasn't changed, this has no effect. logger.debug("Applying pod spec.") self.model.pod.set_spec(spec) if not self.redis.is_ready(): self.unit.status = WaitingStatus(WAITING_FOR_REDIS_MSG) logger.debug( "{}: deferring configure_pod".format(WAITING_FOR_REDIS_MSG)) event.defer() return self.set_ready_status() @log_event_handler def update_status(self, _): """Set status for all units. Status may be - Redis API server not reachable (service is not ready), - Ready """ if not self.unit.is_leader(): self.unit.status = ActiveStatus() return if not self.redis.is_ready(): self.unit.status = WaitingStatus(WAITING_FOR_REDIS_MSG) return self.set_ready_status() @log_event_handler def relation_changed(self, event): """This event handler pass the host and port to the remote unit. Any Redis client is provided with the following information - Redis host - Redis port Using this information a client can establish a connection with Redis, for instances using the redis Python library. """ if not self.unit.is_leader(): logger.debug("Relation changes ignored by non-leader") return event.relation.data[self.unit]['hostname'] = str( self.bind_address(event)) event.relation.data[self.unit]['port'] = str(DEFAULT_PORT) # The reactive Redis charm exposes also 'password'. When tackling # https://github.com/canonical/redis-operator/issues/7 add 'password' # field so that it matches the exposed interface information from it. # event.relation.data[self.unit]['password'] = '' def bind_address(self, event): relation = self.model.get_relation(event.relation.name, event.relation.id) if address := self.model.get_binding(relation).network.bind_address: return address return self.app.name
class NssfCharm(CharmBase): """NSSF charm events class definition""" state = StoredState() def __init__(self, *args): """NSSF charm constructor.""" super().__init__(*args) self.state.set_default(pod_spec=None) self.image = OCIImageResource(self, "image") # Registering regular events self.framework.observe(self.on.config_changed, self.configure_pod) # Registering required relation changed events self.framework.observe(self.on.nrf_relation_changed, self._on_nrf_relation_changed) # Registering required relation broken events self.framework.observe(self.on.nrf_relation_broken, self._on_nrf_relation_broken) # -- initialize states -- self.state.set_default(nrf_host=None) def _on_nrf_relation_changed(self, event: EventBase) -> NoReturn: """Reads information about the NRF relation. Args: event (EventBase): NRF relation event. """ if event.app not in event.relation.data: return nrf_host = event.relation.data[event.app].get("hostname") if nrf_host and self.state.nrf_host != nrf_host: self.state.nrf_host = nrf_host self.configure_pod() def _on_nrf_relation_broken(self, _=None) -> NoReturn: """Clears data from NRF relation.""" self.state.nrf_host = None self.configure_pod() def _missing_relations(self) -> str: """Checks if there missing relations. Returns: str: string with missing relations """ data_status = {"nrf": self.state.nrf_host} missing_relations = [k for k, v in data_status.items() if not v] return ", ".join(missing_relations) @property def relation_state(self) -> Dict[str, Any]: """Collects relation state configuration for pod spec assembly. Returns: Dict[str, Any]: relation state information. """ relation_state = {"nrf_host": self.state.nrf_host} return relation_state def configure_pod(self, _=None) -> NoReturn: """Assemble the pod spec and apply it, if possible.""" missing = self._missing_relations() if missing: self.unit.status = BlockedStatus( "Waiting for {0} relation{1}".format( missing, "s" if "," in missing else "")) return if not self.unit.is_leader(): self.unit.status = ActiveStatus("ready") return self.unit.status = MaintenanceStatus("Assembling pod spec") # Fetch image information try: self.unit.status = MaintenanceStatus("Fetching image information") image_info = self.image.fetch() except OCIImageResourceError: self.unit.status = BlockedStatus( "Error fetching image information") return try: pod_spec = make_pod_spec( image_info, self.model.config, self.model.app.name, self.relation_state, ) except ValueError as exc: logger.exception("Config data validation error") self.unit.status = BlockedStatus(str(exc)) return if self.state.pod_spec != pod_spec: self.model.pod.set_spec(pod_spec) self.state.pod_spec = pod_spec self.unit.status = ActiveStatus("ready")
class Operator(CharmBase): """Deploys the katib-ui service.""" def __init__(self, framework): super().__init__(framework) self.image = OCIImageResource(self, "oci-image") self.framework.observe(self.on.install, self.set_pod_spec) self.framework.observe(self.on.upgrade_charm, self.set_pod_spec) self.framework.observe( self.on["ingress"].relation_changed, self.set_pod_spec, ) def set_pod_spec(self, event): try: self._check_leader() interfaces = self._get_interfaces() image_details = self._check_image_details() except CheckFailed as check_failed: self.model.unit.status = check_failed.status return self._configure_ingress(interfaces) self.model.unit.status = MaintenanceStatus("Setting pod spec") try: image_details = self.image.fetch() except OCIImageResourceError as e: self.model.unit.status = e.status return self.model.pod.set_spec( { "version": 3, "serviceAccount": { "roles": [ { "global": True, "rules": [ { "apiGroups": [""], "resources": [ "configmaps", "namespaces", ], "verbs": ["*"], }, { "apiGroups": ["kubeflow.org"], "resources": [ "experiments", "trials", "suggestions", ], "verbs": ["*"], }, ], } ] }, "containers": [ { "name": "katib-ui", "command": ["./katib-ui"], "args": [f"--port={self.model.config['port']}"], "imageDetails": image_details, "ports": [ { "name": "http", "containerPort": self.model.config["port"], } ], "envConfig": { "KATIB_CORE_NAMESPACE": self.model.name, }, } ], }, ) self.model.unit.status = ActiveStatus() def _configure_ingress(self, interfaces): if interfaces["ingress"]: interfaces["ingress"].send_data( { "prefix": "/katib/", "service": self.model.app.name, "port": self.model.config["port"], } ) def _check_leader(self): if not self.unit.is_leader(): # We can't do anything useful when not the leader, so do nothing. raise CheckFailed("Waiting for leadership", WaitingStatus) def _get_interfaces(self): try: interfaces = get_interfaces(self) except NoVersionsListed as err: raise CheckFailed(err, WaitingStatus) except NoCompatibleVersions as err: raise CheckFailed(err, BlockedStatus) return interfaces def _check_image_details(self): try: image_details = self.image.fetch() except OCIImageResourceError as e: raise CheckFailed(f"{e.status.message}", e.status_type) return image_details
class RedisCharm(CharmBase): state = StoredState() def __init__(self, *args): super().__init__(*args) logger.debug('Initializing charm') self.state.set_default(pod_spec=None) self.redis = RedisClient(host=self.model.app.name, port=DEFAULT_PORT) self.image = OCIImageResource(self, "redis-image") self.framework.observe(self.on.start, self.on_start) self.framework.observe(self.on.stop, self.on_stop) self.framework.observe(self.on.config_changed, self.configure_pod) self.framework.observe(self.on.upgrade_charm, self.configure_pod) self.framework.observe(self.on.update_status, self.update_status) @log_event_handler def on_start(self, event): """Initialize Redis. This event handler is deferred if initialization of Redis fails. """ if not self.unit.is_leader(): self.unit.status = ActiveStatus() return if not self.redis.is_ready(): self.unit.status = WaitingStatus(WAITING_FOR_REDIS_MSG) logger.debug( "{}: deferring on_start".format(WAITING_FOR_REDIS_MSG)) event.defer() return self.set_ready_status() @log_event_handler def on_stop(self, _): """Mark terminating unit as inactive. """ self.redis.close() self.unit.status = MaintenanceStatus('Pod is terminating.') @log_event_handler def configure_pod(self, event): """Applies the pod configuration. """ if not self.unit.is_leader(): logger.debug("Spec changes ignored by non-leader") self.unit.status = ActiveStatus() return self.unit.status = WaitingStatus("Fetching image information ...") try: image_info = self.image.fetch() except OCIImageResourceError: self.unit.status = BlockedStatus( "Error fetching image information.") return # Build Pod spec builder = PodSpecBuilder( name=self.model.app.name, port=DEFAULT_PORT, image_info=image_info, ) spec = builder.build_pod_spec() logger.debug("Pod spec: \n{}".format(yaml.dump(spec))) # Update pod spec if the generated one is different # from the one previously applied. if self.state.pod_spec == spec: logger.debug("Discarding pod spec because it has not changed.") else: logger.debug("Applying new pod spec.") self.model.pod.set_spec(spec) self.state.pod_spec = spec if not self.redis.is_ready(): self.unit.status = WaitingStatus(WAITING_FOR_REDIS_MSG) logger.debug( "{}: deferring configure_pod".format(WAITING_FOR_REDIS_MSG)) event.defer() return self.set_ready_status() @log_event_handler def update_status(self, _): """Set status for all units. Status may be - Redis API server not reachable (service is not ready), - Ready """ if not self.unit.is_leader(): self.unit.status = ActiveStatus() return if not self.redis.is_ready(): self.unit.status = WaitingStatus(WAITING_FOR_REDIS_MSG) return self.set_ready_status() def set_ready_status(self): logger.debug('Pod is ready.') self.unit.status = ActiveStatus() self.app.status = ActiveStatus()