class Metric: _INVALID_RECORDS = prometheus_client.Counter( 'mjolnir_swift_invalid_records_total', "Number of requests that could not be processed", ['reason'] ) FAIL_VALIDATE = _INVALID_RECORDS.labels(reason="validate") FAIL_NO_CONFIG = _INVALID_RECORDS.labels(reason="no_config") PROCESS_MESSAGE = prometheus_client.Summary( 'mjolnir_swift_process_message_seconds', 'Time taken to process individual kafka messages') BULK_IMPORT = prometheus_client.Summary( 'mjolnir_swift_import_file_seconds', 'Time taken to import a file into elasticsearch' ) _BULK_ACTION_RESULT = prometheus_client.Counter( 'mjolnir_swift_action_total', 'Number of bulk action responses per result type', ['result']) ACTION_RESULTS = { 'updated': _BULK_ACTION_RESULT.labels(result='updated'), 'created': _BULK_ACTION_RESULT.labels(result='created'), 'noop': _BULK_ACTION_RESULT.labels(result='noop'), } OK_UNKNOWN = _BULK_ACTION_RESULT.labels(result='ok_unknown') MISSING = _BULK_ACTION_RESULT.labels(result='missing') FAILED = _BULK_ACTION_RESULT.labels(result='failed') TIMEOUT = _BULK_ACTION_RESULT.labels(result='timeout')
def __init__(self, blocking_pool, timeout, namespace, k8s_api): self.blocking_pool = blocking_pool self.timeout = timeout self.namespace = namespace self._delete_pod = self._wrap_k8s_delete(k8s_api.delete_namespaced_pod) self._delete_pvc = self._wrap_k8s_delete( k8s_api.delete_namespaced_persistent_volume_claim) self._create_pod = self._wrap_k8s( k8s_api.create_namespaced_pod, pc.Summary('batch_create_pod_seconds', 'Batch k8s create pod latency in seconds')) self._create_pvc = self._wrap_k8s( k8s_api.create_namespaced_persistent_volume_claim, pc.Summary('batch_create_pvc_seconds', 'Batch k8s create pvc latency in seconds')) self._read_pod_log = self._wrap_k8s(k8s_api.read_namespaced_pod_log) self._read_pod_status = self._wrap_k8s( k8s_api.read_namespaced_pod_status) self._list_pods = self._wrap_k8s(k8s_api.list_namespaced_pod) self._list_pvcs = self._wrap_k8s( k8s_api.list_namespaced_persistent_volume_claim) self._get_pod = self._wrap_k8s(k8s_api.read_namespaced_pod) self._get_pvc = self._wrap_k8s( k8s_api.read_namespaced_persistent_volume_claim) self._read_secret = self._wrap_k8s( k8s_api.read_namespaced_secret, pc.Summary('batch_read_secret', 'Batch k8s read secret latency in seconds'))
def __init__(self, client, **kwargs): super().__init__(client, **kwargs) self.min_keyword_length = 3 self.max_query_length = 1024 self.max_page_size = 100 self.max_keywords = 5 self._helper_funcs = { "nusers": ( self._base_query_nusers, self._key_nusers, ), "address": ( self._base_query_address, self._key_address, ) } try: import prometheus_client except ImportError: self._response_duration_metric = None else: self._response_duration_metric = prometheus_client.Summary( "muclumbus_xmpp_search_response_duration_seconds", "Time it takes to answer a search response", ["phase"])
class Metric(object): """A Namespace for our metrics""" # Metrics we record in prometheus _INVALID_RECORDS = prometheus_client.Counter( 'mjolnir_bulk_invalid_records_total', "Number of requests that could not be processed", ['reason']) FAIL_VALIDATE = _INVALID_RECORDS.labels(reason='fail_validate') MISSING_INDEX = _INVALID_RECORDS.labels(reason='missing_index') SUBMIT_BATCH = prometheus_client.Summary( 'mjolnir_bulk_submit_batch_seconds', 'Time taken to submit a batch from kafka to elasticsearch') RECORDS_PROCESSED = prometheus_client.Counter( 'mjolnir_bulk_records_total', 'Number of kafka records processed') _BULK_ACTION_RESULT = prometheus_client.Counter( 'mjolnir_bulk_action_total', 'Number of bulk action somethings', ['result']) ACTION_RESULTS = { 'updated': _BULK_ACTION_RESULT.labels(result='updated'), 'created': _BULK_ACTION_RESULT.labels(result='created'), 'noop': _BULK_ACTION_RESULT.labels(result='noop'), } OK_UNKNOWN = _BULK_ACTION_RESULT.labels(result='ok_unknown') MISSING = _BULK_ACTION_RESULT.labels(result='missing') FAILED = _BULK_ACTION_RESULT.labels(result='failed')
def get_metrics(cls): if cls.inited: return cls else: # Register your metrics here cls.REQUEST_TIME = prometheus_client.Summary( "some_summary", "Time spent in processing request" ) cls.inited = True
def __init__(self, client, **kwargs): super().__init__(client, **kwargs) self._disco_svc = self.dependencies[aioxmpp.DiscoClient] self.expire_after = timedelta(days=7) self.non_muc_rescan_delay = timedelta(hours=6) self.address_blocklist = frozenset() self._worker_pool._timeout = timedelta(seconds=45) try: import prometheus_client except ImportError: self._domain_scanned_metric = None self._disco_info_duration_metric = None self._version_duration_metric = None self._disco_items_duration_metric = None self._pass_duration_metric = None self._last_pass_end_metric = None self._update_duration_metric = None else: self._domain_scanned_metric = prometheus_client.Summary( "muclumbus_scanner_domain_scan_duration", "Total number of scan operations executed", ["type"], ) self._disco_info_duration_metric = prometheus_client.Summary( "muclumbus_scanner_disco_info_duration_seconds", "Duration of info requests", ["result"]) self._update_duration_metric = prometheus_client.Summary( "muclumbus_scanner_update_duration_seconds", "Duration of database updates", ["operation"]) self._version_duration_metric = prometheus_client.Summary( "muclumbus_scanner_version_duration_seconds", "Duration of software version requests", ["result"]) self._disco_items_duration_metric = prometheus_client.Summary( "muclumbus_scanner_disco_items_duration_seconds", "Duration of items requests", ["type", "result"], ) self._pass_duration_metric = prometheus_client.Gauge( "muclumbus_scanner_pass_duration_seconds", "Duration of the last pass in seconds") self._last_pass_end_metric = prometheus_client.Gauge( "muclumbus_scanner_last_pass_end_seconds", "Timestamp of the last pass")
def __init__(self, client, **kwargs): super().__init__(client, **kwargs) self._disco_svc = self.dependencies[aioxmpp.DiscoClient] self._vcard_client = self.dependencies[aioxmpp.vcard.VCardService] self.expire_after = timedelta(days=2) self.avatar_whitelist = frozenset() self.address_blocklist = frozenset() try: import prometheus_client except ImportError: self._room_scanned_metric = None self._disco_info_duration_metric = None self._avatar_fetch_duration_metric = None self._avatar_proc_duration_metric = None self._pass_duration_metric = None self._last_pass_end_metric = None self._update_duration_metric = None else: self._room_scanned_metric = prometheus_client.Summary( "muclumbus_watcher_room_scan_duration", "Total number of scan operations executed", ) self._disco_info_duration_metric = prometheus_client.Summary( "muclumbus_watcher_disco_info_duration_seconds", "Duration of info requests", ["result"]) self._update_duration_metric = prometheus_client.Summary( "muclumbus_watcher_update_duration_seconds", "Duration of database updates", ) self._avatar_proc_duration_metric = prometheus_client.Summary( "muclumbus_watcher_avatar_proc_duration_seconds", "Duration of avatar processing", ) self._avatar_fetch_duration_metric = prometheus_client.Summary( "muclumbus_watcher_avatar_fetch_duration_seconds", "Duration of avatar requests", ["result"]) self._pass_duration_metric = prometheus_client.Gauge( "muclumbus_watcher_pass_duration_seconds", "Duration of the last pass in seconds") self._last_pass_end_metric = prometheus_client.Gauge( "muclumbus_watcher_last_pass_end_seconds", "Timestamp of the last pass")
class Metric(object): """A namespace for our runtime metrics""" RECORDS_PROCESSED = prometheus_client.Counter( 'mjolnir_msearch_records_total', 'Number of kafka records processed') INTERVAL_VALUE = prometheus_client.Gauge( 'mjolnir_msearch_interval_sec', 'Seconds between polling elasticsearch for qps stats') EMA = prometheus_client.Gauge('mjolnir_msearch_ema_qps', 'Local estimate of canary index qps') PROCESS_BATCH = prometheus_client.Summary( 'mjolnir_msearch_process_batch_seconds', 'Time taken to process a batch of records from kafka')
def metrics_factory(registry): return { 'plain_gauge': prometheus_client.Gauge('plain_gauge', 'Simple gauge', registry=registry), 'instance_gauge': prometheus_client.Gauge('instance_gauge', 'Gauge with custom label', ['instance'], registry=registry), 'service_gauge': prometheus_client.Gauge('service_gauge', 'Gauge with "service" label', ['service'], registry=registry), 'counter': prometheus_client.Counter('counter', 'Simple counter', registry=registry), 'summary': prometheus_client.Summary('summary', 'Simple summary', registry=registry), 'histogram': prometheus_client.Histogram('histogram', 'Histogram with custom and "service" ' 'labels', ['instance', 'service'], registry=registry) }
def __init__(self, client, **kwargs): super().__init__(client, **kwargs) self._pubsub = self.dependencies[aioxmpp.PubSubClient] self._disco = self.dependencies[aioxmpp.DiscoClient] self.publish_target = None self._state_future.add_done_callback(self._initialise) self._event_buffer = {} self._first_enqueued = None self._enqueued_callback = None self._worker_pool = worker_pool.WorkerPool( self.WORKER_POOL_SIZE, self._handle_item, max_queue_size=self.WORKER_POOL_SIZE * 128, delay=(self.MIN_PROCESS_INTERVAL * self.WORKER_POOL_SIZE).total_seconds(), logger=self.logger, ) self.node_config = aioxmpp.forms.Data(aioxmpp.forms.DataType.SUBMIT) self.node_config.fields.append( aioxmpp.forms.Field( type_=aioxmpp.forms.FieldType.HIDDEN, var="FORM_TYPE", values=["http://jabber.org/protocol/pubsub#node_config"])) self.node_config.fields.append( aioxmpp.forms.Field(var="pubsub#access_model", values=["open"])) self.node_config.fields.append( aioxmpp.forms.Field(var="pubsub#max_items", values=["16777216"])) self.node_config.fields.append( aioxmpp.forms.Field(var="pubsub#persist_items", values=["1"])) try: import prometheus_client except ImportError: self._initial_sync_duration_metric = None self._update_duration_metric = None self._lost_update_metric = None else: self._initial_sync_duration_metric = prometheus_client.Gauge( "muclumbus_mirror_server_initial_sync_duration_seconds", "Duration of various initial sync phases", ["phase"], ) self._update_duration_metric = prometheus_client.Summary( "muclumbus_mirror_server_update_duration_seconds", "Duration of the updates", ["operation"], ) self._lost_update_metric = prometheus_client.Counter( "muclumbus_mirror_server_lost_update_count", "Number of updates lost due to business", )
def __init__(self, p_logger, p_config): self._logger = p_logger self._config = p_config self._gauge_monitored_users = prometheus_client.Gauge( self._config.prefix + "monitored_users", "number of monitored users") self._gauge_active_users = prometheus_client.Gauge( self._config.prefix + "active_users", "number of active users", ['username']) self._gauge_configured_users = prometheus_client.Gauge( self._config.prefix + "configured_users", "number of configured users") self._gauge_monitored_hosts = prometheus_client.Gauge( self._config.prefix + "monitored_hosts", "number of monitored hosts", ['hostname']) self._gauge_monitored_devices = prometheus_client.Gauge( self._config.prefix + "monitored_devices", "number of monitored devices") self._gauge_active_devices = prometheus_client.Gauge( self._config.prefix + "active_devices", "number of active devices", ['devicename']) self._gauge_device_response_time = prometheus_client.Gauge( self._config.prefix + "device_response_time", "response time of device [ms]", ['devicename']) self._gauge_device_moving_average_response_time = \ prometheus_client.Gauge(self._config.prefix + "device_moving_average_response_time", "moving average of response time of device [ms]", ['devicename']) self._counter_forced_logouts = prometheus_client.Counter( self._config.prefix + "forced_logouts", "number of forced logouts", ['username']) self._summary_http_requests = prometheus_client.Summary( self._config.prefix + "http_requests", "request duration [ms] and count", ['service', 'hostname']) self._info_system = prometheus_client.Info( self._config.prefix + "system", "system information") self._info_system.info({ "version": settings.settings['version'], "revision": settings.extended_settings['debian_package_revision'] }) self._gauge_uptime = prometheus_client.Gauge( self._config.prefix + "uptime", "uptime in seconds") self._start_time = time.time() self._gauge_uptime.set_function(lambda: time.time() - self._start_time)
def __init__(self, registry=None): super().__init__() self.registry = registry or prometheus_client.REGISTRY self._response_time_metric = prometheus_client.Summary( "muclumbus_http_response_seconds", "Monotonic time passed for processing a reqeust", ["endpoint", "http_status"]) self._existence_metric = prometheus_client.Gauge( "muclumbus_http_endpoint_flag", "Existence of an endpoint in the code", ["endpoint"], ) # self.registry.register(self) self.handle_metrics = self.observe("metrics", self.handle_metrics)
class InstrumentedHandler(web.RequestHandler): duration_metric = prometheus_client.Summary( 'http_request_duration_microseconds', 'The HTTP request latencies in microseconds.', ['handler']) total_metric = prometheus_client.Counter( 'http_requests_total', 'Total number of HTTP requests made.', ['code', 'handler', 'method']) def on_finish(self): super(InstrumentedHandler, self).on_finish() handler = type(self).__name__ self.duration_metric.labels(handler).observe( self.request.request_time() * 1e6) self.total_metric.labels(self.get_status(), handler, self.request.method.lower()).inc()
def __init__(self, config): # type: (AirflowFetchingConfiguration) -> WebFetcher super(WebFetcher, self).__init__(config) self.env = "Airflow" self.base_url = config.base_url self.endpoint_url = config.url self.api_mode = config.api_mode self.rbac_username = config.rbac_username self.rbac_password = config.rbac_password self.client = requests.session() self.is_logged_in = False if WebFetcher.prometheus_af_response_time_metrics is None: WebFetcher.prometheus_af_response_time_metrics = prometheus_client.Summary( "af_monitor_export_response_time", "Airflow export plugin response time", ["airflow_instance"], )
def main( target_db_id: int = 1, successful_writes_target: int = 10000, ): def cursor_generator(): # just round robin, disregarding the config for db_settings in itertools.cycle(db_config.DATABASES): print('switching DB settings to:', db_settings) yield common.new_cursor(db_settings.host, db_settings.port, db_settings.user) sql = f""" UPDATE {common.TABLE_NAME} SET value = value + 1 WHERE ID={target_db_id}; """ print('Starting writes...', flush=True) cursor_gen = cursor_generator() cursor = next(cursor_gen) successes = 0 success_metric = prometheus_client.Counter('successful_writes', 'How many passed DB writes.') write_times = prometheus_client.Summary( 'write_times', 'How long successful writes took.') while successes != successful_writes_target: if successes % 200 == 0: print(successes, 'writes completed') try: query_start = perf_counter() cursor.execute(sql) successes += 1 success_metric.inc() write_times.observe(perf_counter() - query_start) except psycopg2.Error: print('PG Error!', flush=True) time.sleep(3) cursor = next(cursor_gen) print('Done', flush=True)
def before_worker_boot(self, broker, worker): self.logger.debug("Setting up metrics...") if self.registry is None: self.registry = prom.CollectorRegistry() self.worker_busy = prom.Gauge( "remoulade_worker_busy", "1 if the worker is processing a message, 0 if not", registry=self.registry) self.total_errored_messages = prom.Counter( "remoulade_message_errors_total", "The total number of errored messages.", ["queue_name", "actor_name"], registry=self.registry, ) self.total_retried_messages = prom.Counter( "remoulade_message_retries_total", "The total number of retried messages.", ["queue_name", "actor_name"], registry=self.registry, ) self.total_rejected_messages = prom.Counter( "remoulade_message_rejects_total", "The total number of dead-lettered messages.", ["queue_name", "actor_name"], registry=self.registry, ) self.message_durations = prom.Summary( "remoulade_message_duration_milliseconds", "The time spent processing messages.", ["queue_name", "actor_name"], registry=self.registry, ) for actor in broker.actors.values(): self._init_labels(actor) self.logger.debug("Starting exposition server...") prom.start_http_server(addr=self.http_host, port=self.http_port, registry=self.registry)
def __init__(self, client, **kwargs): super().__init__(client, **kwargs) try: import prometheus_client except ImportError: self._last_update_metric = None self._initial_sync_duration_metric = None self._update_duration_metric = None else: self._initial_sync_duration_metric = prometheus_client.Gauge( "muclumbus_mirror_client_initial_sync_duration_seconds", "Duration of various initial sync phases", ["phase"], ) self._last_update_metric = prometheus_client.Gauge( "muclumbus_mirror_client_last_update_seconds", "Timestamp of the last update received") self._update_duration_metric = prometheus_client.Summary( "muclumbus_mirror_client_update_duration_seconds", "Duration of the updates", ["operation"], )
def main(): args = parse_args() configure_logging(args.verbose) logger.debug("Arguments: {args}".format(args=args)) if not args.prometheus_disable: logger.debug( "Starting prometheus exporter on {port}".format(port=args.prometheus_port) ) prometheus.start_http_server(args.prometheus_port) kclient = KubernetesClient(args.kubeconfig, args.incluster_base_path) memguardian = MemGuardian(kclient) run_time_summary = prometheus.Summary( "memguardian_loop", "Loop execution time", ) exceptions_count = prometheus.Counter( "memguardian_error", "Errors in the main loop", ) while True: logger.debug("Running MemGuardian") try: with exceptions_count.count_exceptions(): with run_time_summary.time(): memguardian.run() except: logger.exception("Unknown problem in the run loop.") if not args.daemon: break logger.debug( "MemGuardian finished. Sleeping for {delay}".format(delay=args.delay) ) time.sleep(args.delay)
VALIDATOR_CONTAINER_NAME = os.environ.get('VALIDATOR_CONTAINER_NAME', 'validator') # for testnet, https://testnet-api.helium.wtf/v1 API_BASE_URL = os.environ.get('API_BASE_URL', 'https://api.helium.io/v1') # Gather the ledger penalities for all, instead of just "this" validator. When in this # mode all validators from `miner validator ledger` with a penalty >0.0 will be included. # The >0 constraint is just to keep data and traffic smaller. ALL_PENALTIES = os.environ.get('ALL_PENALTIES', 0) # use the RPC calls where available. This means you have your RPC port open. # Once all of the exec calls are replaced we can enable this by default. ENABLE_RPC = os.environ.get('ENABLE_RPC', 0) # prometheus exporter types Gauge,Counter,Summary,Histogram,Info and Enum SCRAPE_TIME = prometheus_client.Summary('validator_scrape_time', 'Time spent collecting miner data') VALIDATOR_DISK_USAGE = prometheus_client.Gauge( 'validator_disk_usage_bytes', 'Disk used by validator directory/volume', ['validator_name']) SYSTEM_USAGE = prometheus_client.Gauge('system_usage', 'Hold current system resource usage', ['resource_type', 'validator_name']) CHAIN_STATS = prometheus_client.Gauge('chain_stats', 'Stats about the global chain', ['resource_type']) VAL = prometheus_client.Gauge('validator_height', "Height of the validator's blockchain", ['resource_type', 'validator_name']) INCON = prometheus_client.Gauge('validator_inconsensus', 'Is validator currently in consensus group', ['validator_name'])
records = db.select_and_fetchall(''' SELECT batches.id, batches_n_jobs_in_complete_states.n_failed FROM batches LEFT JOIN batches_n_jobs_in_complete_states ON batches.id = batches_n_jobs_in_complete_states.id WHERE state = 'running' AND cancel_after_n_failures IS NOT NULL AND n_failed >= cancel_after_n_failures ''') async for batch in records: await _cancel_batch(app, batch['id']) USER_CORES = pc.Gauge('batch_user_cores', 'Batch user cores', ['state', 'user', 'inst_coll']) USER_JOBS = pc.Gauge('batch_user_jobs', 'Batch user jobs', ['state', 'user', 'inst_coll']) FREE_CORES = pc.Summary('batch_free_cores', 'Batch instance free cores', ['inst_coll']) UTILIZATION = pc.Summary('batch_utilization', 'Batch utilization rates', ['inst_coll']) COST_PER_HOUR = pc.Summary('batch_cost_per_hour', 'Batch cost ($/hr)', ['measure', 'inst_coll']) INSTANCES = pc.Gauge('batch_instances', 'Batch instances', ['inst_coll', 'state']) StateUserInstCollLabels = namedtuple('StateUserInstCollLabels', ['state', 'user', 'inst_coll']) InstCollLabels = namedtuple('InstCollLabels', ['inst_coll']) CostPerHourLabels = namedtuple('CostPerHourLabels', ['measure', 'inst_coll']) InstanceLabels = namedtuple('InstanceLabels', ['inst_coll', 'state']) async def monitor_user_resources(app):
from functools import wraps import prometheus_client as pc # type: ignore from prometheus_async.aio import time as prom_async_time # type: ignore REQUEST_TIME = pc.Summary('http_request_latency_seconds', 'Endpoint latency in seconds', ['endpoint', 'verb']) REQUEST_COUNT = pc.Counter('http_request_count', 'Number of HTTP requests', ['endpoint', 'verb', 'status']) def monitor_endpoint(handler): @wraps(handler) async def wrapped(request, *args, **kwargs): # Use the path template given to @route.<METHOD>, not the fully resolved one endpoint = request.match_info.route.resource.canonical verb = request.method response = await prom_async_time( REQUEST_TIME.labels(endpoint=endpoint, verb=verb), handler(request, *args, **kwargs)) REQUEST_COUNT.labels(endpoint=endpoint, verb=verb, status=response.status).inc() return response return wrapped
#!/usr/bin/env python """Simple Python application which exposes metrics to Prometheus and InfluxDB to populate pre-configured dashboards in Grafana. """ import logging import math import time import random import requests import threading import prometheus_client as prometheus from flask import Flask app = Flask(__name__) REQUEST_TIME = prometheus.Summary('request_processing_seconds', 'Time spent processing request') APP_Gauge = prometheus.Gauge('flask_gauge', 'Flask input changing gauge') def main(): """Entry point to example app.""" setup_logging(logging.DEBUG) logging.info('Starting Flask Server') threading.Thread(target=start_flask).start() logging.info('Starting Prometheus Server') start_prometheus() logging.info('Starting metrics generation to InfluxDB') start_metrics() def setup_logging(level=logging.INFO): """Setup logging to output to standard out."""
'active_users_red_to_green_seconds', 'Time in seconds for /active-users to change from 500 to 200', registry=registry) respTime = prom.Gauge('active_users_response_time_seconda', 'Response time for /active-users', registry=registry) errCount = prom.Counter('active_users_errors', 'Error count for /active-users', registry=registry) tokCount = prom.Counter('token_refresh', 'Number of token refresh requests', registry=registry) TOKEN_CHECK = prom.Summary('runtime_token_status_seconds', 'Time spent processing /token-status', registry=registry) TOKEN_GEN = prom.Summary('runtime_token_seconds', 'Time spent processing /token', registry=registry) ACTIVE_USERS = prom.Summary( 'runtime_active_users_seconds', 'Time spent processing /active-users inclusive of token check/refresh', registry=registry) # Global - bad implementation TOKEN = '' def update_last_recovery_time(red, green): delta = green - red
""" import re import json import time import logging import datetime import threading import prometheus_client from agent import utils # pylint: disable-msg=no-value-for-parameter DB_GET_SUMMARY_METRIC = \ prometheus_client.Summary("database_get_processing_seconds", "Time spent handling Database Get Call") # pylint: disable-msg=no-value-for-parameter DB_UPDATE_SUMMARY_METRIC = \ prometheus_client.Summary("database_update_processing_seconds", "Time spent handling Database Update Call") # pylint: disable-msg=no-value-for-parameter DB_INSERT_SUMMARY_METRIC = \ prometheus_client.Summary("database_insert_processing_seconds", "Time spent handling Database Insert Call") # pylint: disable-msg=no-value-for-parameter DB_DELETE_SUMMARY_METRIC = \ prometheus_client.Summary("database_delete_processing_seconds", "Time spent handling Database Delete Call") # pylint: disable-msg=no-value-for-parameter DB_FIND_PARAMS_SUMMARY_METRIC = \ prometheus_client.Summary("database_find_params_processing_seconds",
# import uvloop from ..batch import Batch, Job from ..log_store import LogStore from ..database import BatchDatabase, JobsBuilder from ..datetime_json import JSON_ENCODER from ..batch_configuration import POD_VOLUME_SIZE, INSTANCE_ID from . import schemas # uvloop.install() log = logging.getLogger('batch.front_end') REQUEST_TIME = pc.Summary('batch2_request_latency_seconds', 'Batch request latency in seconds', ['endpoint', 'verb']) REQUEST_TIME_GET_JOB = REQUEST_TIME.labels( endpoint='/api/v1alpha/batches/batch_id/jobs/job_id', verb="GET") REQUEST_TIME_GET_JOB_LOG = REQUEST_TIME.labels( endpoint='/api/v1alpha/batches/batch_id/jobs/job_id/log', verb="GET") REQUEST_TIME_GET_POD_STATUS = REQUEST_TIME.labels( endpoint='/api/v1alpha/batches/batch_id/jobs/job_id/pod_status', verb="GET") REQUEST_TIME_GET_BATCHES = REQUEST_TIME.labels(endpoint='/api/v1alpha/batches', verb="GET") REQUEST_TIME_POST_CREATE_JOBS = REQUEST_TIME.labels( endpoint='/api/v1alpha/batches/batch_id/jobs/create', verb="POST") REQUEST_TIME_POST_CREATE_BATCH = REQUEST_TIME.labels( endpoint='/api/v1alpha/batches/create', verb='POST') REQUEST_TIME_POST_GET_BATCH = REQUEST_TIME.labels(
from biggraphite import settings as bg_settings from biggraphite import graphite_utils from biggraphite import metric as bg_metric # Ignore D102: Missing docstring in public method: Most of them come from upstream module. # pylama:ignore=D102 WRITE_TIME = prometheus_client.Histogram( "bg_write_latency_seconds", "write latency in seconds", buckets=(0.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5), ) CREATE_TIME = prometheus_client.Summary( "bg_create_latency_seconds", "create latency in seconds" ) EXISTS_TIME = prometheus_client.Summary( "bg_exists_latency_seconds", "create latency in seconds" ) CREATES = prometheus_client.Counter( "bg_creates", "metric creations" ) CREATES_ENQUEUED = prometheus_client.Counter( "bg_creates_enqueued", "metrics scheduled for creation" ) CREATES_DEQUEUED = prometheus_client.Counter(
import prometheus_client as prom req_summary = prom.Summary('namespace_streamingservice1', 'Time spent processing a request') @req_summary.time() def process(event, counter1, counter2): try: time.sleep(random.random()) except Exception as e: print(str(e)) if __name__ == '__main__': counter1 = prom.Counter('namespace_ingress', 'Counter for ingress') counter2 = prom.Counter('namespace_egress', 'Counter for egress') prom.start_http_server = (8000) while True: process(event, counter1, counter2) # from prometheus_client import CollectorRegistry, Gauge, push_to_gateway, Counter # from prometheus_client.exposition import basic_auth_handler # import random # import time # def my_auth_handler(url, method, timeout, headers, data): # username = '******' # password = '******' # return basic_auth_handler(url, method, timeout, headers, data, username, password)
import common # create logger logging.config.fileConfig('logging.conf') logger = logging.getLogger('api_monitor') # prometheus initialization registry = prom.CollectorRegistry() green = prom.Gauge('service_last_green_timestamp', 'Latest 200 status for /status endpoint', registry=registry) red = prom.Gauge('service_last_red_timestamp', 'Latest 40x/50x status for /status endpoint', registry=registry) REQUEST_TIME = prom.Summary('runtime_status_seconds', 'Time spent processing /status', registry=registry) @REQUEST_TIME.time() def is_service_ok(): url = 'https://lackadaisical-tip.glitch.me/status' status = common.is_http_ok(url) g = None if status: green.set_to_current_time() else: red.set_to_current_time() # if prom.push_to_gateway('localhost:9091', job='api_status', registry=registry)
import prometheus_client as prom import random import time from threading import Thread from flask import Flask, request from flask_prometheus import monitor req_summary = prom.Summary('python_my_req_example', 'Time spent processing a request') @req_summary.time() def process_request(t): time.sleep(t) app = Flask("pyProm") @app.route('/', methods=["GET", "POST"]) def hi(): if request.method == "GET": return "OK", 200, None return "Bad Request", 400, None counter = prom.Counter('python_my_counter', 'This is my counter') gauge = prom.Gauge('python_my_gauge', 'This is my gauge') histogram = prom.Histogram('python_my_histogram', 'This is my histogram') summary = prom.Summary('python_my_summary', 'This is my summary')
'rpc_server_status_code', 'Server side status code', ['service_name', 'method', 'status_code', 'server_type']) SERVER_IN_PROGRESS = prometheus_client.Gauge("rpc_server_inprogress_requests", "", ["service_name", "server_type"], multiprocess_mode='livesum') SERVER_FREE_WORKERS = prometheus_client.Gauge("rpc_server_free_workers", "", ["service_name", "server_type"], multiprocess_mode='livesum') CLIENT_IN_PROGRESS_REQUESTS = prometheus_client.Gauge( "rpc_client_in_progress_requests", "Client side in progress requests", ["client_service_name", "server_service_name", "method_name"], multiprocess_mode='livesum' ) CLIENT_ATTEMPT_DEADLINE = prometheus_client.Histogram( 'rpc_deadline_histogram', 'Deadline set for each attempt', [ 'caller_service_name', 'service_name', 'config', 'method']) CLIENT_ATTEMPT_LATENCY = prometheus_client.Summary( 'rpc_attempt_latency_summary', 'Latency for each attempt', [ 'caller_service_name', 'service_name', 'config', 'method', 'status_code']) CLIENT_LATENCY = prometheus_client.Histogram( 'rpc_latency_histogram', 'Overall latency after all retries', [ 'caller_service_name', 'service_name', 'method']) CLIENT_RETRIES = prometheus_client.Summary( 'rpc_retries_summary', 'Number of retries', [ 'caller_service_name', 'service_name', 'method', 'status_code']) JOB_LATENCY = prometheus_client.Histogram( 'rpc_server_job_latency', 'Overall latency of async RabbitMQ calls', [ 'caller_service_name', 'service_name', 'method', 'status_code']) JOB_RETRIES = prometheus_client.Summary('rpc_server_job_retries', 'number of retries to execute a job', [ 'caller_service_name', 'service_name', 'method', 'status_code' ])