def smartstack_status( service: str, instance: str, job_config: LongRunningServiceConfig, service_namespace_config: ServiceNamespaceConfig, pods: Sequence[V1Pod], settings: Any, should_return_individual_backends: bool = False, ) -> Mapping[str, Any]: registration = job_config.get_registrations()[0] instance_pool = job_config.get_pool() smartstack_replication_checker = KubeSmartstackReplicationChecker( nodes=kubernetes_tools.get_all_nodes(settings.kubernetes_client), system_paasta_config=settings.system_paasta_config, ) node_hostname_by_location = smartstack_replication_checker.get_allowed_locations_and_hosts( job_config ) expected_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace( service=service, namespace=instance, cluster=settings.cluster, instance_type_class=KubernetesDeploymentConfig, ) expected_count_per_location = int( expected_smartstack_count / len(node_hostname_by_location) ) smartstack_status: MutableMapping[str, Any] = { "registration": registration, "expected_backends_per_location": expected_count_per_location, "locations": [], } for location, hosts in node_hostname_by_location.items(): synapse_host = smartstack_replication_checker.get_first_host_in_pool( hosts, instance_pool ) sorted_backends = sorted( smartstack_tools.get_backends( registration, synapse_host=synapse_host, synapse_port=settings.system_paasta_config.get_synapse_port(), synapse_haproxy_url_format=settings.system_paasta_config.get_synapse_haproxy_url_format(), ), key=lambda backend: backend["status"], reverse=True, # put 'UP' backends above 'MAINT' backends ) matched_backends_and_pods = match_backends_and_pods(sorted_backends, pods) location_dict = smartstack_tools.build_smartstack_location_dict( location, matched_backends_and_pods, should_return_individual_backends ) smartstack_status["locations"].append(location_dict) return smartstack_status
def check_all_kubernetes_services_replication(soa_dir: str) -> None: kube_client = KubeClient() all_pods = get_all_pods(kube_client) all_nodes = get_all_nodes(kube_client) system_paasta_config = load_system_paasta_config() cluster = system_paasta_config.get_cluster() smartstack_replication_checker = KubeSmartstackReplicationChecker( nodes=all_nodes, system_paasta_config=system_paasta_config, ) for service in list_services(soa_dir=soa_dir): service_config = PaastaServiceConfigLoader(service=service, soa_dir=soa_dir) for instance_config in service_config.instance_configs( cluster=cluster, instance_type_class=kubernetes_tools. KubernetesDeploymentConfig, ): if instance_config.get_docker_image(): check_service_replication( instance_config=instance_config, all_pods=all_pods, smartstack_replication_checker= smartstack_replication_checker, ) else: log.debug( '%s is not deployed. Skipping replication monitoring.' % instance_config.job_id, )
def main( instance_type_class: Type[InstanceConfig_T], check_service_replication: CheckServiceReplication, namespace: str, mesos: bool = False, ) -> None: args = parse_args() if args.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) system_paasta_config = load_system_paasta_config() cluster = system_paasta_config.get_cluster() replication_checker: SmartstackReplicationChecker if mesos: tasks_or_pods, slaves = get_mesos_tasks_and_slaves( system_paasta_config) replication_checker = MesosSmartstackReplicationChecker( mesos_slaves=slaves, system_paasta_config=system_paasta_config, ) else: tasks_or_pods, nodes = get_kubernetes_pods_and_nodes(namespace) replication_checker = KubeSmartstackReplicationChecker( nodes=nodes, system_paasta_config=system_paasta_config, ) pct_under_replicated = check_services_replication( soa_dir=args.soa_dir, cluster=cluster, service_instances=args.service_instance_list, instance_type_class=instance_type_class, check_service_replication=check_service_replication, replication_checker=replication_checker, all_tasks_or_pods=tasks_or_pods, ) if yelp_meteorite is not None: emit_cluster_replication_metrics( pct_under_replicated, cluster, scheduler="mesos" if mesos else "kubernetes") if pct_under_replicated >= args.under_replicated_crit_pct: log.critical( f"{pct_under_replicated}% of instances are under replicated " f"(past {args.under_replicated_crit_pct} is critical)!") sys.exit(2) elif pct_under_replicated >= args.under_replicated_warn_pct: log.warning( f"{pct_under_replicated}% of instances are under replicated " f"(past {args.under_replicated_warn_pct} is a warning)!") sys.exit(1) else: sys.exit(0)
def check_all_kubernetes_based_services_replication( soa_dir: str, service_instances: Sequence[str], instance_type_class: Type[InstanceConfig_T], check_service_replication: CheckServiceReplication, namespace: str, ) -> None: kube_client = KubeClient() all_pods = get_all_pods(kube_client=kube_client, namespace=namespace) all_nodes = get_all_nodes(kube_client) system_paasta_config = load_system_paasta_config() cluster = system_paasta_config.get_cluster() smartstack_replication_checker = KubeSmartstackReplicationChecker( nodes=all_nodes, system_paasta_config=system_paasta_config) service_instances_set = set(service_instances) for service in list_services(soa_dir=soa_dir): service_config = PaastaServiceConfigLoader(service=service, soa_dir=soa_dir) for instance_config in service_config.instance_configs( cluster=cluster, instance_type_class=instance_type_class): if (service_instances_set and f"{service}{SPACER}{instance_config.instance}" not in service_instances_set): continue if instance_config.get_docker_image(): check_service_replication( instance_config=instance_config, all_pods=all_pods, smartstack_replication_checker= smartstack_replication_checker, ) else: log.debug( "%s is not deployed. Skipping replication monitoring." % instance_config.job_id)