def kubernetes_instance_status( instance_status: Mapping[str, Any], service: str, instance: str, verbose: bool, ) -> Mapping[str, Any]: kstatus: Dict[str, Any] = {} job_config = kubernetes_tools.load_kubernetes_service_config( service, instance, settings.cluster, soa_dir=settings.soa_dir, ) client = settings.kubernetes_client if client is not None: # bouncing status can be inferred from app_count, ref get_bouncing_status pod_list = kubernetes_tools.pods_for_service_instance( job_config.service, job_config.instance, client) active_shas = kubernetes_tools.get_active_shas_for_service(pod_list) kstatus['app_count'] = max( len(active_shas['config_sha']), len(active_shas['git_sha']), ) kstatus['desired_state'] = job_config.get_desired_state() kstatus['bounce_method'] = kubernetes_tools.KUBE_DEPLOY_STATEGY_REVMAP[ job_config.get_bounce_method()] kubernetes_job_status(kstatus=kstatus, client=client, job_config=job_config, verbose=verbose, pod_list=pod_list) return kstatus
def kubernetes_instance_status( instance_status: Mapping[str, Any], service: str, instance: str, verbose: int, include_smartstack: bool, ) -> Mapping[str, Any]: kstatus: Dict[str, Any] = {} job_config = kubernetes_tools.load_kubernetes_service_config( service, instance, settings.cluster, soa_dir=settings.soa_dir) client = settings.kubernetes_client if client is not None: # bouncing status can be inferred from app_count, ref get_bouncing_status pod_list = kubernetes_tools.pods_for_service_instance( job_config.service, job_config.instance, client) replicaset_list = kubernetes_tools.replicasets_for_service_instance( job_config.service, job_config.instance, client) active_shas = kubernetes_tools.get_active_shas_for_service(pod_list) kstatus["app_count"] = max(len(active_shas["config_sha"]), len(active_shas["git_sha"])) kstatus["desired_state"] = job_config.get_desired_state() kstatus["bounce_method"] = kubernetes_tools.KUBE_DEPLOY_STATEGY_REVMAP[ job_config.get_bounce_method()] kubernetes_job_status( kstatus=kstatus, client=client, job_config=job_config, verbose=verbose, pod_list=pod_list, replicaset_list=replicaset_list, ) if include_smartstack: service_namespace_config = kubernetes_tools.load_service_namespace_config( service=service, namespace=job_config.get_nerve_namespace(), soa_dir=settings.soa_dir, ) if "proxy_port" in service_namespace_config: kstatus["smartstack"] = kubernetes_smartstack_status( service, instance, job_config, service_namespace_config, pod_list, should_return_individual_backends=verbose > 0, ) return kstatus
def kubernetes_status( service: str, instance: str, verbose: int, include_smartstack: bool, instance_type: str, settings: Any, ) -> Mapping[str, Any]: kstatus: Dict[str, Any] = {} config_loader = LONG_RUNNING_INSTANCE_TYPE_HANDLERS[instance_type].loader job_config = config_loader( service=service, instance=instance, cluster=settings.cluster, soa_dir=settings.soa_dir, load_deployments=True, ) kube_client = settings.kubernetes_client if kube_client is None: return kstatus app = kubernetes_tools.get_kubernetes_app_by_name( name=job_config.get_sanitised_deployment_name(), kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) # bouncing status can be inferred from app_count, ref get_bouncing_status pod_list = kubernetes_tools.pods_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) replicaset_list = kubernetes_tools.replicasets_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) active_shas = kubernetes_tools.get_active_shas_for_service( [app, *pod_list, *replicaset_list] ) kstatus["app_count"] = max( len(active_shas["config_sha"]), len(active_shas["git_sha"]) ) kstatus["desired_state"] = job_config.get_desired_state() kstatus["bounce_method"] = job_config.get_bounce_method() job_status( kstatus=kstatus, client=kube_client, namespace=job_config.get_kubernetes_namespace(), job_config=job_config, verbose=verbose, pod_list=pod_list, replicaset_list=replicaset_list, ) if job_config.is_autoscaling_enabled() is True: try: kstatus["autoscaling_status"] = autoscaling_status( kube_client, job_config, job_config.get_kubernetes_namespace() ) except ApiException as e: error_message = ( f"Error while reading autoscaling information: {e.getResponseBody()}" ) kstatus["error_message"].append(error_message) evicted_count = 0 for pod in pod_list: if pod.status.reason == "Evicted": evicted_count += 1 kstatus["evicted_count"] = evicted_count if include_smartstack: service_namespace_config = kubernetes_tools.load_service_namespace_config( service=job_config.get_service_name_smartstack(), namespace=job_config.get_nerve_namespace(), soa_dir=settings.soa_dir, ) if "proxy_port" in service_namespace_config: kstatus["smartstack"] = smartstack_status( service=job_config.get_service_name_smartstack(), instance=job_config.get_nerve_namespace(), job_config=job_config, service_namespace_config=service_namespace_config, pods=pod_list, should_return_individual_backends=verbose > 0, settings=settings, ) return kstatus
def kubernetes_status( service: str, instance: str, verbose: int, include_smartstack: bool, include_envoy: bool, instance_type: str, settings: Any, ) -> Mapping[str, Any]: kstatus: Dict[str, Any] = {} config_loader = LONG_RUNNING_INSTANCE_TYPE_HANDLERS[instance_type].loader job_config = config_loader( service=service, instance=instance, cluster=settings.cluster, soa_dir=settings.soa_dir, load_deployments=True, ) kube_client = settings.kubernetes_client if kube_client is None: return kstatus app = kubernetes_tools.get_kubernetes_app_by_name( name=job_config.get_sanitised_deployment_name(), kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) # bouncing status can be inferred from app_count, ref get_bouncing_status pod_list = kubernetes_tools.pods_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) replicaset_list = kubernetes_tools.replicasets_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) # For the purpose of active_shas/app_count, don't count replicasets that are at 0/0. actually_running_replicasets = filter_actually_running_replicasets( replicaset_list) active_shas = kubernetes_tools.get_active_shas_for_service( [app, *pod_list, *actually_running_replicasets]) kstatus["app_count"] = len(active_shas) kstatus["desired_state"] = job_config.get_desired_state() kstatus["bounce_method"] = job_config.get_bounce_method() kstatus["active_shas"] = list(active_shas) job_status( kstatus=kstatus, client=kube_client, namespace=job_config.get_kubernetes_namespace(), job_config=job_config, verbose=verbose, pod_list=pod_list, replicaset_list=replicaset_list, ) if (job_config.is_autoscaling_enabled() is True and job_config.get_autoscaling_params().get( "decision_policy", "") != "bespoke" # type: ignore ): try: kstatus["autoscaling_status"] = autoscaling_status( kube_client, job_config, job_config.get_kubernetes_namespace()) except Exception as e: kstatus[ "error_message"] = f"Unknown error occurred while fetching autoscaling status. Please contact #compute-infra for help: {e}" evicted_count = 0 for pod in pod_list: if pod.status.reason == "Evicted": evicted_count += 1 kstatus["evicted_count"] = evicted_count if include_smartstack or include_envoy: service_namespace_config = kubernetes_tools.load_service_namespace_config( service=service, namespace=job_config.get_nerve_namespace(), soa_dir=settings.soa_dir, ) if "proxy_port" in service_namespace_config: if include_smartstack: kstatus["smartstack"] = mesh_status( service=service, service_mesh=ServiceMesh.SMARTSTACK, instance=job_config.get_nerve_namespace(), job_config=job_config, service_namespace_config=service_namespace_config, pods=pod_list, should_return_individual_backends=verbose > 0, settings=settings, ) if include_envoy: kstatus["envoy"] = mesh_status( service=service, service_mesh=ServiceMesh.ENVOY, instance=job_config.get_nerve_namespace(), job_config=job_config, service_namespace_config=service_namespace_config, pods=pod_list, should_return_individual_backends=verbose > 0, settings=settings, ) return kstatus
def kubernetes_status_v2( service: str, instance: str, verbose: int, include_smartstack: bool, include_envoy: bool, instance_type: str, settings: Any, ): status: Dict[str, Any] = {} config_loader = LONG_RUNNING_INSTANCE_TYPE_HANDLERS[instance_type].loader job_config = config_loader( service=service, instance=instance, cluster=settings.cluster, soa_dir=settings.soa_dir, load_deployments=True, ) kube_client = settings.kubernetes_client if kube_client is None: return status app_name = job_config.get_sanitised_deployment_name() pod_list = kubernetes_tools.pods_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) # TODO(PAASTA-17315): support statefulsets, too pods_by_replicaset = get_pods_by_replicaset(pod_list) replicaset_list = kubernetes_tools.replicasets_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) # For the purpose of active_shas/app_count, don't count replicasets that # are at 0/0. actually_running_replicasets = filter_actually_running_replicasets( replicaset_list) desired_state = job_config.get_desired_state() status["app_name"] = app_name status["desired_state"] = desired_state status["desired_instances"] = (job_config.get_instances() if desired_state != "stop" else 0) status["bounce_method"] = job_config.get_bounce_method() service_namespace_config = kubernetes_tools.load_service_namespace_config( service=service, namespace=job_config.get_nerve_namespace(), soa_dir=settings.soa_dir, ) backends = None if "proxy_port" in service_namespace_config: envoy_status = mesh_status( service=service, service_mesh=ServiceMesh.ENVOY, instance=job_config.get_nerve_namespace(), job_config=job_config, service_namespace_config=service_namespace_config, pods=pod_list, should_return_individual_backends=True, settings=settings, ) if envoy_status.get("locations"): backends = { be["address"] for be in envoy_status["locations"][0].get("backends", []) } else: backends = set() if include_envoy: # Note we always include backends here now status["envoy"] = envoy_status status["replicasets"] = [ get_replicaset_status(replicaset, pods_by_replicaset.get(replicaset.metadata.name), backends) for replicaset in actually_running_replicasets ] return status
def kubernetes_mesh_status( service: str, instance: str, instance_type: str, settings: Any, include_smartstack: bool = True, include_envoy: bool = True, ) -> Mapping[str, Any]: if not include_smartstack and not include_envoy: raise RuntimeError( "No mesh types specified when requesting mesh status") if instance_type not in LONG_RUNNING_INSTANCE_TYPE_HANDLERS: raise RuntimeError( f"Getting mesh status for {instance_type} instances is not supported" ) config_loader = LONG_RUNNING_INSTANCE_TYPE_HANDLERS[instance_type].loader job_config = config_loader( service=service, instance=instance, cluster=settings.cluster, soa_dir=settings.soa_dir, load_deployments=True, ) service_namespace_config = kubernetes_tools.load_service_namespace_config( service=service, namespace=job_config.get_nerve_namespace(), soa_dir=settings.soa_dir, ) if "proxy_port" not in service_namespace_config: raise RuntimeError( f"Instance '{service}.{instance}' is not configured for the mesh") kube_client = settings.kubernetes_client pod_list = kubernetes_tools.pods_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) kmesh: Dict[str, Any] = {} mesh_status_kwargs = dict( service=service, instance=job_config.get_nerve_namespace(), job_config=job_config, service_namespace_config=service_namespace_config, pods=pod_list, should_return_individual_backends=True, settings=settings, ) if include_smartstack: kmesh["smartstack"] = mesh_status( service_mesh=ServiceMesh.SMARTSTACK, **mesh_status_kwargs, ) if include_envoy: kmesh["envoy"] = mesh_status( service_mesh=ServiceMesh.ENVOY, **mesh_status_kwargs, ) return kmesh
def kubernetes_status_v2( service: str, instance: str, verbose: int, include_smartstack: bool, include_envoy: bool, instance_type: str, settings: Any, ): status: Dict[str, Any] = {} config_loader = LONG_RUNNING_INSTANCE_TYPE_HANDLERS[instance_type].loader job_config = config_loader( service=service, instance=instance, cluster=settings.cluster, soa_dir=settings.soa_dir, load_deployments=True, ) kube_client = settings.kubernetes_client if kube_client is None: return status desired_state = job_config.get_desired_state() status["app_name"] = job_config.get_sanitised_deployment_name() status["desired_state"] = desired_state status["desired_instances"] = (job_config.get_instances() if desired_state != "stop" else 0) status["bounce_method"] = job_config.get_bounce_method() pod_list = kubernetes_tools.pods_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) service_namespace_config = kubernetes_tools.load_service_namespace_config( service=service, namespace=job_config.get_nerve_namespace(), soa_dir=settings.soa_dir, ) backends = None if "proxy_port" in service_namespace_config: envoy_status = mesh_status( service=service, service_mesh=ServiceMesh.ENVOY, instance=job_config.get_nerve_namespace(), job_config=job_config, service_namespace_config=service_namespace_config, pods=pod_list, should_return_individual_backends=True, settings=settings, ) if envoy_status.get("locations"): backends = { be["address"] for be in envoy_status["locations"][0].get("backends", []) } else: backends = set() if include_envoy: # Note we always include backends here now status["envoy"] = envoy_status if job_config.get_persistent_volumes(): controller_revision_list = kubernetes_tools.controller_revisions_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) status["versions"] = get_versions_for_controller_revisions( controller_revision_list, pod_list, backends, ) else: replicaset_list = kubernetes_tools.replicasets_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) status["versions"] = get_versions_for_replicasets( replicaset_list, pod_list, backends, ) return status
async def kubernetes_status_v2( service: str, instance: str, verbose: int, include_smartstack: bool, include_envoy: bool, instance_type: str, settings: Any, ): status: Dict[str, Any] = {} config_loader = LONG_RUNNING_INSTANCE_TYPE_HANDLERS[instance_type].loader job_config = config_loader( service=service, instance=instance, cluster=settings.cluster, soa_dir=settings.soa_dir, load_deployments=True, ) kube_client = settings.kubernetes_client if kube_client is None: return status tasks: List["asyncio.Future[Dict[str, Any]]"] = [] if ( verbose > 1 and job_config.is_autoscaling_enabled() and job_config.get_autoscaling_params().get("decision_policy", "") != "bespoke" # type: ignore ): autoscaling_task = asyncio.create_task( autoscaling_status( kube_client, job_config, job_config.get_kubernetes_namespace() ) ) tasks.append(autoscaling_task) else: autoscaling_task = None pods_task = asyncio.create_task( kubernetes_tools.pods_for_service_instance( service=service, instance=instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) ) tasks.append(pods_task) service_namespace_config = kubernetes_tools.load_service_namespace_config( service=service, namespace=job_config.get_nerve_namespace(), soa_dir=settings.soa_dir, ) if "proxy_port" in service_namespace_config: mesh_status_task = asyncio.create_task( mesh_status( service=service, service_mesh=ServiceMesh.ENVOY, instance=job_config.get_nerve_namespace(), job_config=job_config, service_namespace_config=service_namespace_config, pods_task=pods_task, should_return_individual_backends=True, settings=settings, ) ) backends_task = asyncio.create_task( get_backends_from_mesh_status(mesh_status_task) ) tasks.extend([mesh_status_task, backends_task]) else: mesh_status_task = None backends_task = None if job_config.get_persistent_volumes(): pod_status_by_sha_and_readiness_task = asyncio.create_task( get_pod_status_tasks_by_sha_and_readiness( pods_task, backends_task, kube_client, verbose, ) ) versions_task = asyncio.create_task( get_versions_for_controller_revisions( kube_client=kube_client, service=service, instance=instance, namespace=job_config.get_kubernetes_namespace(), pod_status_by_sha_and_readiness_task=pod_status_by_sha_and_readiness_task, ) ) tasks.extend([pod_status_by_sha_and_readiness_task, versions_task]) else: pod_status_by_replicaset_task = asyncio.create_task( get_pod_status_tasks_by_replicaset( pods_task, backends_task, kube_client, verbose, ) ) versions_task = asyncio.create_task( get_versions_for_replicasets( kube_client=kube_client, service=service, instance=instance, namespace=job_config.get_kubernetes_namespace(), pod_status_by_replicaset_task=pod_status_by_replicaset_task, ) ) tasks.extend([pod_status_by_replicaset_task, versions_task]) await asyncio.gather(*tasks, return_exceptions=True) desired_state = job_config.get_desired_state() status["app_name"] = job_config.get_sanitised_deployment_name() status["desired_state"] = desired_state status["desired_instances"] = ( job_config.get_instances() if desired_state != "stop" else 0 ) status["bounce_method"] = job_config.get_bounce_method() try: pods_task.result() # just verifies we have a valid result # These tasks also depend on pods_task, so we cannot populate them without pods status["versions"] = versions_task.result() if mesh_status_task is not None: status["envoy"] = mesh_status_task.result() except asyncio.TimeoutError: status["versions"] = [] status["error_message"] = ( "Could not fetch instance data. " "This is usually a temporary problem. Please try again or contact #compute-infra for help if you continue to see this message\n" ) if autoscaling_task is not None: try: status["autoscaling_status"] = autoscaling_task.result() except Exception as e: if "error_message" not in status: status["error_message"] = ( f"Unknown error occurred while fetching autoscaling status. " f"Please contact #compute-infra for help: {e}" ) else: status[ "error_message" ] += f"Unknown error occurred while fetching autoscaling status: {e}" return status
def kubernetes_instance_status( instance_status: Mapping[str, Any], service: str, instance: str, verbose: int, include_smartstack: bool, instance_type: str, ) -> Mapping[str, Any]: kstatus: Dict[str, Any] = {} config_loader = LONG_RUNNING_INSTANCE_TYPE_HANDLERS[instance_type].loader job_config = config_loader( service=service, instance=instance, cluster=settings.cluster, soa_dir=settings.soa_dir, load_deployments=True, ) client = settings.kubernetes_client if client is not None: # bouncing status can be inferred from app_count, ref get_bouncing_status pod_list = kubernetes_tools.pods_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=client, namespace=job_config.get_kubernetes_namespace(), ) replicaset_list = kubernetes_tools.replicasets_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=client, namespace=job_config.get_kubernetes_namespace(), ) active_shas = kubernetes_tools.get_active_shas_for_service(pod_list) kstatus["app_count"] = max( len(active_shas["config_sha"]), len(active_shas["git_sha"]) ) kstatus["desired_state"] = job_config.get_desired_state() kstatus["bounce_method"] = job_config.get_bounce_method() kubernetes_job_status( kstatus=kstatus, client=client, namespace=job_config.get_kubernetes_namespace(), job_config=job_config, verbose=verbose, pod_list=pod_list, replicaset_list=replicaset_list, ) evicted_count = 0 for pod in pod_list: if pod.status.reason == "Evicted": evicted_count += 1 kstatus["evicted_count"] = evicted_count if include_smartstack: service_namespace_config = kubernetes_tools.load_service_namespace_config( service=job_config.get_service_name_smartstack(), namespace=job_config.get_nerve_namespace(), soa_dir=settings.soa_dir, ) if "proxy_port" in service_namespace_config: kstatus["smartstack"] = kubernetes_smartstack_status( service=job_config.get_service_name_smartstack(), instance=job_config.get_nerve_namespace(), job_config=job_config, service_namespace_config=service_namespace_config, pods=pod_list, should_return_individual_backends=verbose > 0, ) return kstatus