async def upgrade_cluster(namespace: str, name: str, body: kopf.Body): """ Update the Docker image in all StatefulSets for the cluster. For the changes to take affect, the cluster needs to be restarted. :param namespace: The Kubernetes namespace for the CrateDB cluster. :param name: The name for the ``CrateDB`` custom resource. :param body: The full body of the ``CrateDB`` custom resource per :class:`kopf.Body`. """ apps = AppsV1Api() crate_image = (body.spec["cluster"]["imageRegistry"] + ":" + body.spec["cluster"]["version"]) updates = [] if "master" in body.spec["nodes"]: updates.append( update_statefulset(apps, namespace, f"crate-master-{name}", crate_image)) updates.extend([ update_statefulset(apps, namespace, f"crate-data-{node_spec['name']}-{name}", crate_image) for node_spec in body.spec["nodes"]["data"] ]) await asyncio.gather(*updates)
async def create_backups( owner_references: Optional[List[V1OwnerReference]], namespace: str, name: str, labels: LabelType, http_port: int, prometheus_port: int, backups: Dict[str, Any], image_pull_secrets: Optional[List[V1LocalObjectReference]], has_ssl: bool, logger: logging.Logger, ) -> None: backup_aws = backups.get("aws") async with ApiClient() as api_client: apps = AppsV1Api(api_client) batchv1_beta1 = BatchV1beta1Api(api_client) if backup_aws: await call_kubeapi( batchv1_beta1.create_namespaced_cron_job, logger, continue_on_conflict=True, namespace=namespace, body=get_backup_cronjob( owner_references, name, labels, http_port, backup_aws, image_pull_secrets, has_ssl, ), ) await call_kubeapi( apps.create_namespaced_deployment, logger, continue_on_conflict=True, namespace=namespace, body=get_backup_metrics_exporter( owner_references, name, labels, http_port, prometheus_port, backup_aws, image_pull_secrets, has_ssl, ), )
async def test_not_enabled(self, faker, namespace, api_client): name = faker.domain_word() apps = AppsV1Api(api_client) batchv1_beta1 = BatchV1beta1Api(api_client) await create_backups( None, namespace.metadata.name, name, {}, 12345, 23456, {}, None, True, logging.getLogger(__name__), ) assert (await self.does_cronjob_exist( batchv1_beta1, namespace.metadata.name, f"create-snapshot-{name}") is False) assert (await self.does_deployment_exist( apps, namespace.metadata.name, f"backup-metrics-{name}") is False)
async def cluster_update( namespace: str, name: str, body: kopf.Body, spec: kopf.Spec, diff: kopf.Diff, old: kopf.Body, logger: logging.Logger, **kwargs, ): """ Implement any updates to a cluster. The handler will sort out the logic of what to update, in which order, and when to trigger a restart of a cluster. """ apps = AppsV1Api() do_scale_master = False do_scale_data = False do_upgrade = False requires_restart = False scale_master_diff_item: Optional[kopf.DiffItem] = None scale_data_diff_items: Optional[List[kopf.DiffItem]] = None for operation, field_path, old_value, new_value in diff: if field_path in { ("spec", "cluster", "imageRegistry"), ( "spec", "cluster", "version", ), }: do_upgrade = True elif field_path == ("spec", "nodes", "master", "replicas"): do_scale_master = True scale_master_diff_item = kopf.DiffItem(operation, field_path, old_value, new_value) elif field_path == ("spec", "nodes", "data"): # TODO: check for data node order, added or removed types, ... if len(old_value) != len(new_value): raise kopf.PermanentError( "Cannot handle changes to the number of node specs.") scale_data_diff_items = [] for node_spec_idx in range(len(old_value)): old_spec = old_value[node_spec_idx] new_spec = new_value[node_spec_idx] inner_diff = calc_diff(old_spec, new_spec) for ( inner_operation, inner_field_path, inner_old_value, inner_new_value, ) in inner_diff: if inner_field_path == ("replicas", ): do_scale_data = True scale_data_diff_items.append( kopf.DiffItem( inner_operation, (str(node_spec_idx), ) + inner_field_path, inner_old_value, inner_new_value, )) else: logger.info( "Ignoring operation %s on field %s", operation, field_path + (str(node_spec_idx), ) + inner_field_path, ) else: logger.info("Ignoring operation %s on field %s", operation, field_path) if do_upgrade: webhook_upgrade_payload = WebhookUpgradePayload( old_registry=old["spec"]["cluster"]["imageRegistry"], new_registry=body.spec["cluster"]["imageRegistry"], old_version=old["spec"]["cluster"]["version"], new_version=body.spec["cluster"]["version"], ) await upgrade_cluster(namespace, name, body) requires_restart = True if requires_restart: try: # We need to derive the desired number of nodes from the old spec, # since the new could have a different total number of nodes if a # scaling operation is in progress as well. expected_nodes = get_total_nodes_count(old["spec"]["nodes"]) await with_timeout( restart_cluster(namespace, name, expected_nodes, logger), config.ROLLING_RESTART_TIMEOUT, (f"Failed to restart cluster {namespace}/{name} after " f"{config.ROLLING_RESTART_TIMEOUT} seconds."), ) except Exception: logger.exception("Failed to restart cluster") await webhook_client.send_upgrade_notification( WebhookStatus.FAILURE, namespace, name, webhook_upgrade_payload, logger) raise else: logger.info("Cluster restarted") if do_upgrade: await webhook_client.send_upgrade_notification( WebhookStatus.SUCCESS, namespace, name, webhook_upgrade_payload, logger, ) if do_scale_master or do_scale_data: webhook_scale_payload = WebhookScalePayload( old_data_replicas=[ WebhookScaleNodePayload(name=item["name"], replicas=item["replicas"]) for item in old["spec"]["nodes"]["data"] ], new_data_replicas=[ WebhookScaleNodePayload(name=item["name"], replicas=item["replicas"]) for item in body.spec["nodes"]["data"] ], old_master_replicas=old["spec"]["nodes"].get("master", {}).get("replicas"), new_master_replicas=body.spec["nodes"].get("master", {}).get("replicas"), ) try: await with_timeout( scale_cluster( apps, namespace, name, do_scale_data, do_scale_master, get_total_nodes_count(old["spec"]["nodes"]), spec, scale_master_diff_item, kopf.Diff(scale_data_diff_items) if scale_data_diff_items else None, logger, ), config.SCALING_TIMEOUT, (f"Failed to scale cluster {namespace}/{name} after " f"{config.SCALING_TIMEOUT} seconds."), ) except Exception: logger.exception("Failed to scale cluster") await webhook_client.send_scale_notification( WebhookStatus.FAILURE, namespace, name, webhook_scale_payload, logger) raise else: logger.info("Cluster scaled") await webhook_client.send_scale_notification( WebhookStatus.SUCCESS, namespace, name, webhook_scale_payload, logger)
async def cluster_create(namespace: str, meta: kopf.Meta, spec: kopf.Spec, logger: logging.Logger, **kwargs): name = meta["name"] base_labels = { LABEL_MANAGED_BY: "crate-operator", LABEL_NAME: name, LABEL_PART_OF: "cratedb", } cratedb_labels = base_labels.copy() cratedb_labels[LABEL_COMPONENT] = "cratedb" cratedb_labels.update(meta.get("labels", {})) apps = AppsV1Api() batchv1_beta1 = BatchV1beta1Api() core = CoreV1Api() owner_references = [ V1OwnerReference( api_version=f"{API_GROUP}/v1", block_owner_deletion=True, controller=True, kind="CrateDB", name=name, uid=meta["uid"], ) ] image_pull_secrets = ([ V1LocalObjectReference(name=secret) for secret in config.IMAGE_PULL_SECRETS ] if config.IMAGE_PULL_SECRETS else None) ports_spec = spec.get("ports", {}) http_port = ports_spec.get("http", Port.HTTP.value) jmx_port = ports_spec.get("jmx", Port.JMX.value) postgres_port = ports_spec.get("postgres", Port.POSTGRES.value) prometheus_port = ports_spec.get("prometheus", Port.PROMETHEUS.value) transport_port = ports_spec.get("transport", Port.TRANSPORT.value) master_nodes = get_master_nodes_names(spec["nodes"]) total_nodes_count = get_total_nodes_count(spec["nodes"]) crate_image = spec["cluster"]["imageRegistry"] + ":" + spec["cluster"][ "version"] has_master_nodes = "master" in spec["nodes"] # The first StatefulSet we create references a set of master nodes. These # can either be explicit CrateDB master nodes, or implicit ones, which # would be the first set of nodes from the data nodes list. # # After the first StatefulSet was created, we set `treat_as_master` to # `False` to indicate that all remaining StatefulSets are neither explicit # nor implicit master nodes. treat_as_master = True sts = [] cluster_name = spec["cluster"]["name"] if has_master_nodes: sts.append( create_statefulset( apps, owner_references, namespace, name, cratedb_labels, treat_as_master, False, cluster_name, "master", "master-", spec["nodes"]["master"], master_nodes, total_nodes_count, http_port, jmx_port, postgres_port, prometheus_port, transport_port, crate_image, spec["cluster"].get("ssl"), spec["cluster"].get("settings"), image_pull_secrets, logger, )) treat_as_master = False for node_spec in spec["nodes"]["data"]: node_name = node_spec["name"] sts.append( create_statefulset( apps, owner_references, namespace, name, cratedb_labels, treat_as_master, True, cluster_name, node_name, f"data-{node_name}-", node_spec, master_nodes, total_nodes_count, http_port, jmx_port, postgres_port, prometheus_port, transport_port, crate_image, spec["cluster"].get("ssl"), spec["cluster"].get("settings"), image_pull_secrets, logger, )) treat_as_master = False await asyncio.gather( create_sql_exporter_config(core, owner_references, namespace, name, cratedb_labels, logger), *create_debug_volume(core, owner_references, namespace, name, cratedb_labels, logger), create_system_user(core, owner_references, namespace, name, cratedb_labels, logger), *sts, *create_services( core, owner_references, namespace, name, cratedb_labels, http_port, postgres_port, transport_port, spec.get("cluster", {}).get("externalDNS"), logger, ), ) if has_master_nodes: master_node_pod = f"crate-master-{name}-0" else: node_name = spec["nodes"]["data"][0]["name"] master_node_pod = f"crate-data-{node_name}-{name}-0" await with_timeout( bootstrap_cluster( core, namespace, name, master_node_pod, spec["cluster"].get("license"), "ssl" in spec["cluster"], spec.get("users"), logger, ), config.BOOTSTRAP_TIMEOUT, (f"Failed to bootstrap cluster {namespace}/{name} after " f"{config.BOOTSTRAP_TIMEOUT} seconds."), ) if "backups" in spec: backup_metrics_labels = base_labels.copy() backup_metrics_labels[LABEL_COMPONENT] = "backup" backup_metrics_labels.update(meta.get("labels", {})) await asyncio.gather(*create_backups( apps, batchv1_beta1, owner_references, namespace, name, backup_metrics_labels, http_port, prometheus_port, spec["backups"], image_pull_secrets, "ssl" in spec["cluster"], logger, ))
async def test_create(self, faker, namespace, api_client): apps = AppsV1Api(api_client) batchv1_beta1 = BatchV1beta1Api(api_client) name = faker.domain_word() backups_spec = { "aws": { "accessKeyId": { "secretKeyRef": { "key": faker.domain_word(), "name": faker.domain_word(), }, }, "basePath": faker.uri_path() + "/", "cron": "1 2 3 4 5", "region": { "secretKeyRef": { "key": faker.domain_word(), "name": faker.domain_word(), }, }, "bucket": { "secretKeyRef": { "key": faker.domain_word(), "name": faker.domain_word(), }, }, "secretAccessKey": { "secretKeyRef": { "key": faker.domain_word(), "name": faker.domain_word(), }, }, }, } await create_backups( None, namespace.metadata.name, name, { LABEL_COMPONENT: "backup", LABEL_NAME: name }, 12345, 23456, backups_spec, None, True, logging.getLogger(__name__), ) await assert_wait_for( True, self.does_cronjob_exist, batchv1_beta1, namespace.metadata.name, f"create-snapshot-{name}", ) await assert_wait_for( True, self.does_deployment_exist, apps, namespace.metadata.name, f"backup-metrics-{name}", )
async def handle( # type: ignore self, namespace: str, name: str, spec: kopf.Spec, old: kopf.Body, diff: kopf.Diff, logger: logging.Logger, **kwargs: Any, ): scale_master_diff_item: Optional[kopf.DiffItem] = None scale_data_diff_items: Optional[List[kopf.DiffItem]] = None for operation, field_path, old_value, new_value in diff: if field_path == ("spec", "nodes", "master", "replicas"): scale_master_diff_item = kopf.DiffItem( operation, field_path, old_value, new_value ) elif field_path == ("spec", "nodes", "data"): # TODO: check for data node order, added or removed types, ... if len(old_value) != len(new_value): raise kopf.PermanentError( "Adding and removing node specs is not supported at this time." ) scale_data_diff_items = [] for node_spec_idx in range(len(old_value)): old_spec = old_value[node_spec_idx] new_spec = new_value[node_spec_idx] inner_diff = calc_diff(old_spec, new_spec) for ( inner_operation, inner_field_path, inner_old_value, inner_new_value, ) in inner_diff: if inner_field_path == ("replicas",): scale_data_diff_items.append( kopf.DiffItem( inner_operation, (str(node_spec_idx),) + inner_field_path, inner_old_value, inner_new_value, ) ) else: logger.info( "Ignoring operation %s on field %s", operation, field_path + (str(node_spec_idx),) + inner_field_path, ) else: logger.info("Ignoring operation %s on field %s", operation, field_path) async with ApiClient() as api_client: apps = AppsV1Api(api_client) core = CoreV1Api(api_client) await scale_cluster( apps, core, namespace, name, old, scale_master_diff_item, (kopf.Diff(scale_data_diff_items) if scale_data_diff_items else None), logger, ) self.schedule_notification( WebhookEvent.SCALE, WebhookScalePayload( old_data_replicas=[ WebhookScaleNodePayload( name=item["name"], replicas=item["replicas"] ) for item in old["spec"]["nodes"]["data"] ], new_data_replicas=[ WebhookScaleNodePayload( name=item["name"], replicas=item["replicas"] ) for item in spec["nodes"]["data"] ], old_master_replicas=old["spec"]["nodes"] .get("master", {}) .get("replicas"), new_master_replicas=spec["nodes"].get("master", {}).get("replicas"), ), WebhookStatus.SUCCESS, ) await self.send_notifications(logger)
async def create_statefulset( owner_references: Optional[List[V1OwnerReference]], namespace: str, name: str, labels: LabelType, treat_as_master: bool, treat_as_data: bool, cluster_name: str, node_name: str, node_name_prefix: str, node_spec: Dict[str, Any], master_nodes: List[str], total_nodes_count: int, http_port: int, jmx_port: int, postgres_port: int, prometheus_port: int, transport_port: int, crate_image: str, ssl: Optional[Dict[str, Any]], cluster_settings: Optional[Dict[str, str]], image_pull_secrets: Optional[List[V1LocalObjectReference]], logger: logging.Logger, ) -> None: async with ApiClient() as api_client: apps = AppsV1Api(api_client) await call_kubeapi( apps.create_namespaced_stateful_set, logger, continue_on_conflict=True, namespace=namespace, body=get_statefulset( owner_references, namespace, name, labels, treat_as_master, treat_as_data, cluster_name, node_name, node_name_prefix, node_spec, master_nodes, total_nodes_count, http_port, jmx_port, postgres_port, prometheus_port, transport_port, crate_image, ssl, cluster_settings, image_pull_secrets, logger, ), ) policy = PolicyV1beta1Api(api_client) pdb = V1beta1PodDisruptionBudget( metadata=V1ObjectMeta( name=f"crate-{name}", owner_references=owner_references, ), spec=V1beta1PodDisruptionBudgetSpec( max_unavailable=1, selector=V1LabelSelector( match_labels={ LABEL_COMPONENT: "cratedb", LABEL_NAME: name, LABEL_NODE_NAME: node_name, }), ), ) """ A Pod Distruption Budget ensures that when performing Kubernetes cluster maintenance (i.e. upgrades), we make sure to not disrupt more than 1 pod in a StatefulSet at a time. """ await call_kubeapi( policy.create_namespaced_pod_disruption_budget, logger, continue_on_conflict=True, namespace=namespace, body=pdb, )
async def test_create_minimal(self, faker, namespace, cleanup_handler, kopf_runner): apps = AppsV1Api() coapi = CustomObjectsApi() core = CoreV1Api() name = faker.domain_word() # Clean up persistent volume after the test cleanup_handler.append( core.delete_persistent_volume( name=f"temp-pv-{namespace.metadata.name}-{name}")) await coapi.create_namespaced_custom_object( group=API_GROUP, version="v1", plural=RESOURCE_CRATEDB, namespace=namespace.metadata.name, body={ "apiVersion": "cloud.crate.io/v1", "kind": "CrateDB", "metadata": { "name": name }, "spec": { "cluster": { "imageRegistry": "crate", "name": "my-crate-cluster", "version": "4.1.5", }, "nodes": { "data": [{ "name": "data", "replicas": 3, "resources": { "cpus": 0.5, "memory": "1Gi", "heapRatio": 0.25, "disk": { "storageClass": "default", "size": "16GiB", "count": 1, }, }, }] }, }, }, ) await assert_wait_for( True, self.does_statefulset_exist, apps, namespace.metadata.name, f"crate-data-data-{name}", ) await assert_wait_for( True, self.do_pods_exist, core, namespace.metadata.name, {f"crate-data-data-{name}-{i}" for i in range(3)}, )
async def test_create(self, faker, namespace): apps = AppsV1Api() core = CoreV1Api() name = faker.domain_word() cluster_name = faker.domain_word() node_name = faker.domain_word() await create_statefulset( apps, None, namespace.metadata.name, name, { LABEL_MANAGED_BY: "crate-operator", LABEL_NAME: name, LABEL_PART_OF: "cratedb", LABEL_COMPONENT: "cratedb", }, True, True, cluster_name, node_name, f"data-{node_name}-", { "replicas": 3, "resources": { "cpus": 0.5, "memory": "1Gi", "heapRatio": 0.4, "disk": { "count": 1, "size": "16Gi", "storageClass": "default" }, }, }, ["master-1", "master-2", "master-3"], 3, 10000, 20000, 30000, 40000, 50000, "crate:4.1.5", { "keystore": { "secretKeyRef": { "key": "keystore", "name": "sslcert" } }, "keystoreKeyPassword": { "secretKeyRef": { "key": "keystore-key-password", "name": "sslcert" } }, "keystorePassword": { "secretKeyRef": { "key": "keystore-password", "name": "sslcert" } }, }, {}, [], logging.getLogger(__name__), ) await assert_wait_for( True, self.does_statefulset_exist, apps, namespace.metadata.name, f"crate-data-{node_name}-{name}", ) await assert_wait_for( True, self.do_pods_exist, core, namespace.metadata.name, {f"crate-data-{node_name}-{name}-{i}" for i in range(3)}, )