def emit_replication_metrics( replication_infos: Mapping[str, Mapping[str, Mapping[str, int]]], instance_config: LongRunningServiceConfig, expected_count: int, ) -> None: for provider, replication_info in replication_infos.items(): meteorite_dims = { "paasta_service": instance_config.service, "paasta_cluster": instance_config.cluster, "paasta_instance": instance_config.instance, "paasta_pool": instance_config.get_pool(), "service_discovery_provider": provider, } num_available_backends = 0 for available_backends in replication_info.values(): num_available_backends += available_backends.get( instance_config.job_id, 0) available_backends_gauge = yelp_meteorite.create_gauge( "paasta.service.available_backends", meteorite_dims) available_backends_gauge.set(num_available_backends) critical_percentage = instance_config.get_replication_crit_percentage() num_critical_backends = critical_percentage * expected_count / 100.0 critical_backends_gauge = yelp_meteorite.create_gauge( "paasta.service.critical_backends", meteorite_dims) critical_backends_gauge.set(num_critical_backends) expected_backends_gauge = yelp_meteorite.create_gauge( "paasta.service.expected_backends", meteorite_dims) expected_backends_gauge.set(expected_count)
def emit_metrics_for_type(instance_type): cluster = load_system_paasta_config().get_cluster() instances = get_services_for_cluster(cluster=cluster, instance_type=instance_type) for service, instance in instances: service_instance_config = get_instance_config( service=service, instance=instance, cluster=cluster ) dimensions = { "paasta_service": service_instance_config.service, "paasta_cluster": service_instance_config.cluster, "paasta_instance": service_instance_config.instance, "paasta_pool": service_instance_config.get_pool(), } log.info(f"Emitting paasta.service.* with dimensions {dimensions}") gauge = yelp_meteorite.create_gauge("paasta.service.cpus", dimensions) gauge.set(service_instance_config.get_cpus()) gauge = yelp_meteorite.create_gauge("paasta.service.mem", dimensions) gauge.set(service_instance_config.get_mem()) gauge = yelp_meteorite.create_gauge("paasta.service.disk", dimensions) gauge.set(service_instance_config.get_disk()) if hasattr(service_instance_config, "get_instances"): if service_instance_config.get_max_instances() is None: gauge = yelp_meteorite.create_gauge( "paasta.service.instances", dimensions ) gauge.set(service_instance_config.get_instances())
def emit_metrics_for_type(instance_type): cluster = load_system_paasta_config().get_cluster() instances = get_services_for_cluster( cluster=cluster, instance_type=instance_type, ) for service, instance in instances: service_instance_config = get_instance_config( service=service, instance=instance, cluster=cluster, ) dimensions = { 'paasta_service': service_instance_config.service, 'paasta_cluster': service_instance_config.cluster, 'paasta_instance': service_instance_config.instance, } log.info(f"Emitting paasta.service.* with dimensions {dimensions}") gauge = yelp_meteorite.create_gauge('paasta.service.cpus', dimensions) gauge.set(service_instance_config.get_cpus()) gauge = yelp_meteorite.create_gauge('paasta.service.mem', dimensions) gauge.set(service_instance_config.get_mem()) gauge = yelp_meteorite.create_gauge('paasta.service.disk', dimensions) gauge.set(service_instance_config.get_disk()) if hasattr(service_instance_config, 'get_instances'): if service_instance_config.get_max_instances() is None: gauge = yelp_meteorite.create_gauge('paasta.service.instances', dimensions) gauge.set(service_instance_config.get_instances())
def _record_autoscaling_decision( marathon_service_config: MarathonServiceConfig, autoscaling_params: AutoscalingParamsDict, utilization: float, log_utilization_data: Mapping[str, str], error: float, current_instances: int, num_healthy_instances: int, new_instance_count: int, safe_downscaling_threshold: int, task_data_insufficient: bool, ) -> None: """ Based on the calculations made, perform observability side effects. Log messages, generate time series, send any alerts, etc. """ write_to_log( config=marathon_service_config, line=json.dumps( dict( timestamp=time.time(), paasta_cluster=marathon_service_config.get_cluster(), paasta_service=marathon_service_config.get_service(), paasta_instance=marathon_service_config.get_instance(), autoscaling_params=autoscaling_params, utilization=utilization, error=error, current_instances=current_instances, num_healthy_instances=num_healthy_instances, new_instance_count=new_instance_count, safe_downscaling_threshold=safe_downscaling_threshold, task_data_insufficient=task_data_insufficient, ) ), level="debug", ) meteorite_dims = { "paasta_service": marathon_service_config.service, "paasta_cluster": marathon_service_config.cluster, "paasta_instance": marathon_service_config.instance, "paasta_pool": marathon_service_config.get_pool(), "decision_policy": autoscaling_params[DECISION_POLICY_KEY], # type: ignore } if yelp_meteorite: gauge = yelp_meteorite.create_gauge("paasta.service.instances", meteorite_dims) gauge.set(new_instance_count) gauge = yelp_meteorite.create_gauge( "paasta.service.max_instances", meteorite_dims ) gauge.set(marathon_service_config.get_max_instances()) gauge = yelp_meteorite.create_gauge( "paasta.service.min_instances", meteorite_dims ) gauge.set(marathon_service_config.get_min_instances())
def __init__(self, stat_gauge_name, **kwargs): self.dimensions = kwargs self._meteorite_gauge = yelp_meteorite.create_gauge( stat_gauge_name, self.dimensions )
def emit_cluster_replication_metrics( pct_under_replicated: float, cluster: str, scheduler: str, ) -> None: meteorite_dims = {"paasta_cluster": cluster, "scheduler": scheduler} gauge = yelp_meteorite.create_gauge("paasta.pct_services_under_replicated", meteorite_dims) gauge.set(pct_under_replicated)
def emit_cluster_replication_metrics( pct_under_replicated: float, cluster: str, scheduler: str, dry_run: bool = False, ) -> None: metric_name = "paasta.pct_services_under_replicated" if dry_run: print( f"Would've sent value {pct_under_replicated} for metric '{metric_name}'" ) else: meteorite_dims = {"paasta_cluster": cluster, "scheduler": scheduler} gauge = yelp_meteorite.create_gauge(metric_name, meteorite_dims) gauge.set(pct_under_replicated)
def report_metric_to_meteorite(backend, metric, value, paasta_cluster): try: paasta_service, paasta_instance = parse_haproxy_backend_name(backend) except IndexError: return meteorite_dims = { 'paasta_service': paasta_service, 'paasta_cluster': paasta_cluster, 'paasta_instance': paasta_instance, } path = f'paasta.service.requests.{metric}' if metric in GUAGES: guage = yelp_meteorite.create_gauge(path, meteorite_dims) guage.set(value) elif metric in COUNTERS: counter = yelp_meteorite.create_counter(path, meteorite_dims) counter.count(value) else: raise ValueError( f"{metric} hasn't been configured as a guage or counter") print(f"Sent {path}: {value} to meteorite")
def create_gauge(self, name: str, **kwargs: Any) -> GaugeProtocol: return yelp_meteorite.create_gauge(self.base_name + "." + name, kwargs)
def autoscale_marathon_instance(marathon_service_config, system_paasta_config, marathon_tasks, mesos_tasks): current_instances = marathon_service_config.get_instances() task_data_insufficient = is_task_data_insufficient(marathon_service_config, marathon_tasks, current_instances) autoscaling_params = marathon_service_config.get_autoscaling_params() log_utilization_data = {} utilization = get_utilization( marathon_service_config=marathon_service_config, system_paasta_config=system_paasta_config, autoscaling_params=autoscaling_params, log_utilization_data=log_utilization_data, marathon_tasks=marathon_tasks, mesos_tasks=mesos_tasks, ) error = get_error_from_utilization( utilization=utilization, setpoint=autoscaling_params['setpoint'], current_instances=current_instances, ) new_instance_count = get_new_instance_count( utilization=utilization, error=error, autoscaling_params=autoscaling_params, current_instances=current_instances, marathon_service_config=marathon_service_config, num_healthy_instances=len(marathon_tasks), ) safe_downscaling_threshold = int(current_instances * 0.7) if new_instance_count != current_instances: if new_instance_count < current_instances and task_data_insufficient: write_to_log( config=marathon_service_config, line= 'Delaying scaling *down* as we found too few healthy tasks running in marathon. ' 'This can happen because tasks are delayed/waiting/unhealthy or because we are ' 'waiting for tasks to be killed. Will wait for sufficient healthy tasks before ' 'we make a decision to scale down.', ) return if new_instance_count == safe_downscaling_threshold: write_to_log( config=marathon_service_config, line='Autoscaler clamped: %s' % str(log_utilization_data), level='debug', ) write_to_log( config=marathon_service_config, line='Scaling from %d to %d instances (%s)' % ( current_instances, new_instance_count, humanize_error(error), ), ) set_instances_for_marathon_service( service=marathon_service_config.service, instance=marathon_service_config.instance, instance_count=new_instance_count, ) else: write_to_log( config=marathon_service_config, line='Staying at %d instances (%s)' % (current_instances, humanize_error(error)), level='debug', ) meteorite_dims = { 'service_name': marathon_service_config.service, 'decision_policy': autoscaling_params[DECISION_POLICY_KEY], 'paasta_cluster': marathon_service_config.cluster, 'instance_name': marathon_service_config.instance, } if yelp_meteorite: gauge = yelp_meteorite.create_gauge('paasta.service.instances', meteorite_dims) gauge.set(new_instance_count) gauge = yelp_meteorite.create_gauge('paasta.service.max_instances', meteorite_dims) gauge.set(marathon_service_config.get_max_instances()) gauge = yelp_meteorite.create_gauge('paasta.service.min_instances', meteorite_dims) gauge.set(marathon_service_config.get_min_instances())
def create_gauge(name: str, *args: Any, **kwargs: Any) -> GaugeProtocol: return yelp_meteorite.create_gauge(name, *args, **kwargs)
def create_gauge(self, name, **kwargs): return yelp_meteorite.create_gauge('paasta.deployd.{}'.format(name), kwargs)
def __init__(self, stat_gauge_name, **kwargs): self.dimensions = kwargs self._meteorite_gauge = yelp_meteorite.create_gauge( stat_gauge_name, self.dimensions)