示例#1
0
import time

from classification_model import __version__ as live_version
from dl_classification_model import __version__ as shadow_version

from api.config import APP_NAME
from api.persistence import data_access

# Counter and Histogram are examples of default metrics
# available from the prometheus Python client.
REQUEST_COUNT = Counter(
    name='http_request_count',
    documentation='App Request Count',
    labelnames=['app_name', 'method', 'endpoint', 'http_status'])
REQUEST_LATENCY = Histogram(name='http_request_latency_seconds',
                            documentation='Request latency',
                            labelnames=['app_name', 'endpoint'])


def start_timer() -> None:
    """Get start time of a request."""
    request._prometheus_metrics_request_start_time = time.time()


def stop_timer(response: Response) -> Response:
    """Get stop time of a request.."""
    request_latency = time.time(
    ) - request._prometheus_metrics_request_start_time
    REQUEST_LATENCY.labels(app_name=APP_NAME,
                           endpoint=request.path).observe(request_latency)
    return response
示例#2
0
import http.server
from prometheus_client import start_http_server, Histogram
import time

LATENCY = Histogram('hello_world_latency_seconds',
                    'Time for a request Hello World.')


class MyHandler(http.server.BaseHTTPRequestHandler):
    @LATENCY.time()
    def do_GET(self):
        self.send_response(200)
        self.end_headers()
        self.wfile.write(b"Hello World")


if __name__ == "__main__":
    start_http_server(8000)
    server = http.server.HTTPServer(('localhost', 8001), MyHandler)
    server.serve_forever()
示例#3
0
        label_1='ping',
        label_2='GET',
    ).inc()

    request.app.metrics[MetricsType.TIME_LATENCY.name].labels(
        label_1='ping',
        label_2='GET',
    ).observe(round(float(time() - start_time), 3))

    return json({'success': 'you are home'})


if __name__ == "__main__":
    monitor(app,
            multiprocess_mode='all',
            metrics_path='/metrics',
            is_middleware=False,
            metrics_list=[(MetricsType.COUNT.name,
                           Counter(name=MetricsType.COUNT.value,
                                   documentation='Total count',
                                   labelnames=['label_1', 'label_2'])),
                          (MetricsType.TIME_LATENCY.name,
                           Histogram(
                               name=MetricsType.TIME_LATENCY.value,
                               documentation='Gauge',
                               labelnames=['label_1', 'label_2'],
                           ))]).expose_endpoint()

    app.add_route(ping, 'ping', methods=['GET'])
    app.run(host="127.0.0.1", port=8000, workers=2)
示例#4
0
from prometheus_client import start_http_server, Summary, Gauge, Histogram
import random
import time
import math

# Create Prometheus metrics
REQUEST_TIME = Summary('request_processing_seconds', 'Time spent processing request')
LEVEL = Gauge('current_sea_level', 'Height of Tide at Time (Seconds)')
STATUS = Gauge('current_tide_direction', 'Status of tide (Incoming Outgoing High Low)')

buckets = (0, 0.05, 0.1, .15, .2, .25, .3, .35, .4, .45, .5, .55, math.inf)
REQUEST_HIST = Histogram('latency_histogram', 'Latency Histogram', buckets=buckets)


@REQUEST_TIME.time()
def measure_sea_level(t):
    """A toy function that 'measures sea level' with some latency."""

    t = t/10

    time.sleep(max(0, random.normalvariate(0.3, 0.1)))
    level = 4 * math.asin(math.sin(t / 2)) * math.cos(t - 2)
    df = dfdt(t)

    # {rising: 1, falling: -1, high: 2, low: -2}
    if abs(df) > 0.25:
        status = math.copysign(1, df)
    elif dfdt(t - 1) > 0:
        status = 2
    else:
        status = -2
示例#5
0
debug = os.getenv("DEBUG", "false") == "true"
metrics_port = os.getenv("METRICS_PORT", "19000")

METRIC_MISSES = Gauge("terra_oracle_misses_total", "Total number of oracle misses")
METRIC_HEIGHT = Gauge("terra_oracle_height", "Block height of the LCD node")
METRIC_VOTES = Counter("terra_oracle_votes", "Counter of oracle votes")

METRIC_MARKET_PRICE = Gauge("terra_oracle_market_price", "Last market price", ['denom'])
METRIC_SWAP_PRICE = Gauge("terra_oracle_swap_price", "Last swap price", ['denom'])

METRIC_EXCHANGE_ASK_PRICE = Gauge("terra_oracle_exchange_ask_price", "Exchange ask price", ['exchange', 'denom'])
METRIC_EXCHANGE_MID_PRICE = Gauge("terra_oracle_exchange_mid_price", "Exchange mid price", ['exchange', 'denom'])
METRIC_EXCHANGE_BID_PRICE = Gauge("terra_oracle_exchange_bid_price", "Exchange bid price", ['exchange', 'denom'])

METRIC_OUTBOUND_ERROR = Counter("terra_oracle_request_errors", "Outbound HTTP request error count", ["remote"])
METRIC_OUTBOUND_LATENCY = Histogram("terra_oracle_request_latency", "Outbound HTTP request latency", ["remote"])

# parameters
fx_map = {
    "uusd": "USDUSD",
    "ukrw": "USDKRW",
    "usdr": "USDSDR",
    "umnt": "USDMNT"
}
active_candidate = [
    "uusd",
    "ukrw",
    "usdr",
    "umnt"
]
示例#6
0
from tornado.log import app_log
from tornado.queues import Queue
from tornado.web import Finish, authenticated

from .base import BaseHandler
from .build import Build, ProgressEvent
from .utils import KUBE_REQUEST_TIMEOUT

# Separate buckets for builds and launches.
# Builds and launches have very different characteristic times,
# and there is a cost to having too many buckets in prometheus.
BUILD_BUCKETS = [60, 120, 300, 600, 1800, 3600, 7200, float("inf")]
LAUNCH_BUCKETS = [2, 5, 10, 20, 30, 60, 120, 300, 600, float("inf")]
BUILD_TIME = Histogram(
    "binderhub_build_time_seconds",
    "Histogram of build times",
    ["status"],
    buckets=BUILD_BUCKETS,
)
LAUNCH_TIME = Histogram(
    "binderhub_launch_time_seconds",
    "Histogram of launch times",
    ["status", "retries"],
    buckets=LAUNCH_BUCKETS,
)
BUILD_COUNT = Counter(
    "binderhub_build_count",
    "Counter of builds by repo",
    ["status", "provider", "repo"],
)
LAUNCH_COUNT = Counter(
    "binderhub_launch_count",
示例#7
0
import time

from prometheus_client import Counter, Histogram
from prometheus_client import start_http_server
from flask import request

FLASK_REQUEST_ENDPOINT_SENTINEL = "-"
FLASK_REQUEST_LATENCY = Histogram(
    'flask_request_latency_seconds', 'Flask Request Latency',
    ['method', 'endpoint'])
FLASK_REQUEST_COUNT = Counter(
    'flask_request_count', 'Flask Request Count',
    ['method', 'endpoint', 'http_status'])
FLASK_REQUEST_EXCEPTION_COUNT = Counter(
    'flask_request_exception_count', 'Flask Request Exception Count',
    ['method', 'endpoint', 'http_status'])


def before_request():
	request.start_time = time.time()


def after_request(response):
    request_latency = time.time() - request.start_time
    endpoint = request.url_rule.rule if request.url_rule\
        else FLASK_REQUEST_ENDPOINT_SENTINEL
    FLASK_REQUEST_LATENCY.labels(request.method, endpoint).observe(request_latency)
    FLASK_REQUEST_COUNT.labels(request.method, endpoint, response.status_code).inc()
    FLASK_REQUEST_EXCEPTION_COUNT.labels(request.method, endpoint, response.status_code).count_exceptions()
    return response
示例#8
0
class ZombieCollector(Collector):
    logs_histogram = Histogram("cmd_docker_logs_latency_seconds",
            "Command call latency for docker logs (seconds)",
            buckets=(1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0,
                float("inf")))
    logs_timeout = 1 # 99th latency is 0.04s

    zombie_container_count = Gauge("zombie_container_count",
            "number of zombie container found for this node",
            ["type"])

    class ZombieRecorder(object):
        def __init__(self, type):
            self.type = type
            self.zombies = {} # key is container id, value is enter zombie time

            # When we first meet zombie container, we only record time of that meet,
            # we wait extra decay_time to report it as zombie. Because at the time
            # of our recording, zombie just produced, and haven't been recycled, we
            # wait 5 minutes to avoid possible cases of normal zombie.
            self.decay_time = datetime.timedelta(minutes=5)

        def update(self, zombie_ids, now):
            """ feed in new zombie ids and get id of decayed zombie """
            # remove all records not exist anymore
            for z_id in list(self.zombies.keys()):
                if z_id not in zombie_ids:
                    logger.debug("pop zombie %s that not exist anymore", z_id)
                    self.zombies.pop(z_id)

            result = set()
            for current in zombie_ids:
                if current in self.zombies:
                    enter_zombie_time = self.zombies[current]
                    if now - enter_zombie_time > self.decay_time:
                        result.add(current)
                else:
                    logger.debug("new zombie %s", current)
                    self.zombies[current] = now

            ZombieCollector.zombie_container_count.labels(self.type).set(len(result))
            return result

        def __len__(self):
            return len(self.zombies)

    def __init__(self, name, sleep_time, atomic_ref, iteration_counter, stats_info_ref, zombie_ids_ref):
        Collector.__init__(self, name, sleep_time, atomic_ref, iteration_counter)
        self.stats_info_ref = stats_info_ref
        self.zombie_ids_ref = zombie_ids_ref

        self.type1_zombies = ZombieCollector.ZombieRecorder("job_exit_hangs")
        self.type2_zombies = ZombieCollector.ZombieRecorder("residual_job")

        self.yarn_pattern = u"container_\w{3}_[0-9]{13}_[0-9]{4}_[0-9]{2}_[0-9]{6}"
        self.yarn_container_reg = re.compile(u"^" + self.yarn_pattern + "$")
        self.job_container_reg = re.compile(u"^.+(" + self.yarn_pattern + u")$")

    def update_zombie_count_type1(self, exited_containers, now):
        """ this fn will generate zombie container count for the first type,
        exited_containers is container id set of which we believe exited """
        return self.type1_zombies.update(exited_containers, now)

    def update_zombie_count_type2(self, stats, now):
        """ this fn will generate zombie container count for the second type """
        name_to_id = {}
        for info in stats.values():
            name_to_id[info["name"]] = info["id"]

        # key is job name, value is tuple of corresponding
        # yarn_container name and job container id
        job_containers = {}

        yarn_containers = set()

        zombie_ids = set()

        for name, id in name_to_id.items():
            if re.match(self.yarn_container_reg, name) is not None:
                yarn_containers.add(name)
            elif re.match(self.job_container_reg, name) is not None:
                match = re.match(self.job_container_reg, name)
                value = match.groups()[0]
                job_containers[name] = (value, id)
            else:
                pass # ignore

        for job_name, val in job_containers.items():
            yarn_name, job_id = val
            if yarn_name not in yarn_containers:
                zombie_ids.add(job_id)

        return self.type2_zombies.update(zombie_ids, now)

    def docker_logs(self, container_id, tail="all"):
        try:
            return utils.exec_cmd(
                    ["docker", "logs", "--tail", str(tail), str(container_id)],
                    histogram=ZombieCollector.logs_histogram,
                    stderr=subprocess.STDOUT, # also capture stderr output
                    timeout=ZombieCollector.logs_timeout)
        except subprocess.TimeoutExpired as e:
            logger.warning("docker log timeout")
        except subprocess.CalledProcessError as e:
            logger.warning("docker logs returns %d, output %s", e.returncode, e.output)
        except Exception:
            logger.exception("exec docker logs error")

        return ""

    def is_container_exited(self, container_id):
        logs = self.docker_logs(container_id, tail=50)
        if re.search(u"USER COMMAND END", logs):
            return True
        return False

    def update_zombie_count(self, stats):
        """
        There are two types of zombie:
            1. container which outputed "USER COMMAND END" but did not exist for a long period of time
            2. yarn container exited but job container didn't
        return set of container id that deemed as zombie
        """
        if stats is None:
            logger.warning("docker stats is None")
            return

        exited_containers = set(filter(self.is_container_exited, stats.keys()))

        now = datetime.datetime.now()
        type1_zombies = self.update_zombie_count_type1(exited_containers, now)
        type2_zombies = self.update_zombie_count_type2(stats, now)
        return type1_zombies.union(type2_zombies)

    def collect_impl(self):
        # set it to None so if docker-stats hangs till next time we get,
        # we will get None
        stats_info = self.stats_info_ref.get(datetime.datetime.now())
        all_zombies = self.update_zombie_count(stats_info)
        self.zombie_ids_ref.set(all_zombies, datetime.datetime.now())
示例#9
0
    registry=registry)
freshmaker_event_failed_counter = Counter(
    'freshmaker_event_failed',
    'Number of events, which failed due to error(s)',
    registry=registry)
freshmaker_event_skipped_counter = Counter(
    'freshmaker_event_skipped',
    'Number of events, for which no action was taken',
    registry=registry)
freshmaker_event_canceled_counter = Counter(
    'freshmaker_event_canceled',
    'Number of events canceled during their handling',
    registry=registry)

freshmaker_build_api_latency = Histogram('build_api_latency',
                                         'BuildAPI latency',
                                         registry=registry)
freshmaker_event_api_latency = Histogram('event_api_latency',
                                         'EventAPI latency',
                                         registry=registry)


def db_hook_event_listeners(target=None):
    # Service-specific import of db
    from freshmaker import db

    if not target:
        target = db.engine

    @event.listens_for(target, 'engine_connect')
    def receive_engine_connect(conn, branch):
示例#10
0
class GpuCollector(Collector):
    cmd_histogram = Histogram("cmd_nvidia_smi_latency_seconds",
            "Command call latency for nvidia-smi (seconds)",
            buckets=(1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0,
                float("inf")))

    cmd_timeout = 600

    def __init__(self, name, sleep_time, atomic_ref, iteration_counter,
            gpu_info_ref, zombie_info_ref, mem_leak_thrashold):
        Collector.__init__(self, name, sleep_time, atomic_ref, iteration_counter)
        self.gpu_info_ref = gpu_info_ref
        self.zombie_info_ref = zombie_info_ref
        self.mem_leak_thrashold = mem_leak_thrashold

    @staticmethod
    def get_container_id(pid):
        """ return two values, the first one is if we found the corresponding
        container_id, the second one is the container_id if found """
        path = "/proc/%d/cgroup" % (pid)
        if not os.path.isfile(path):
            return False, ""

        with open(path) as f:
            content = f.read()

        for line in content.split("\n"):
            line = line.strip()
            if "pids" in line:
                if "/docker/" in line:
                    parts = line.split("/docker/")
                    if len(parts) == 2 and re.match(u"[0-9a-f]+", parts[1]):
                        return True, parts[1]
                elif "/kubepods/" in line:
                    parts = line.split("/kubepods/")
                    if len(parts) == 2 and re.match(u"pod[0-9a-f-]+", parts[1]):
                        return True, parts[1]
                else:
                    logger.info("unknown format in pid cgroup %s", line)

        return False, ""

    @staticmethod
    def convert_to_metrics(gpu_info, zombie_info, pid_to_cid_fn, mem_leak_thrashold):
        """ This fn used to convert gpu_info & zombie_info into metrics, used to make
        it easier to do unit test """
        core_utils = gen_gpu_util_gauge()
        mem_utils = gen_gpu_mem_util_gauge()
        gpu_temp = gen_gpu_temperature_gauge()
        ecc_errors = gen_gpu_ecc_counter()
        retired_page = gen_gpu_retired_page_count()
        mem_leak = gen_gpu_memory_leak_counter()
        external_process = gen_gpu_used_by_external_process_counter()
        zombie_container = gen_gpu_used_by_zombie_container_counter()

        pids_use_gpu = {} # key is gpu minor, value is an array of pid

        for minor, info in gpu_info.items():
            if not minor.isdigit():
                continue # ignore UUID

            uuid = info.uuid

            core_utils.add_metric([minor, uuid], info.gpu_util)
            mem_utils.add_metric([minor, uuid], info.gpu_mem_util)
            if info.temperature is not None:
                gpu_temp.add_metric([minor, uuid], info.temperature)
            ecc_errors.add_metric([minor, uuid, "volatile_single"],
                                  info.ecc_errors.volatile_single)
            ecc_errors.add_metric([minor, uuid, "volatile_double"],
                                  info.ecc_errors.volatile_double)
            ecc_errors.add_metric([minor, uuid, "aggregated_single"],
                                  info.ecc_errors.aggregated_single)
            ecc_errors.add_metric([minor, uuid, "aggregated_double"],
                                  info.ecc_errors.aggregated_double)

            retired_page.add_metric([minor, uuid, "single"],
                                    info.ecc_errors.single_retirement)
            retired_page.add_metric([minor, uuid, "double"],
                                    info.ecc_errors.double_retirement)
            if info.gpu_mem_util > mem_leak_thrashold and len(info.pids) == 0:
                # we found memory leak less than 20M can be mitigated automatically
                mem_leak.add_metric([minor, uuid], 1)

            if len(info.pids) > 0:
                pids_use_gpu[minor]= info.pids

        logger.debug("pids_use_gpu is %s, zombie_info is %s", pids_use_gpu, zombie_info)
        if len(pids_use_gpu) > 0:
            if zombie_info is None:
                zombie_info = []

            for minor, pids in pids_use_gpu.items():
                for pid in pids:
                    found, z_id = pid_to_cid_fn(pid)
                    logger.debug("pid %s has found %s, z_id %s", pid, found, z_id)
                    if found:
                        # NOTE: zombie_info is a set of short docker container id, but
                        # z_id is full id.
                        for zombie_id in zombie_info:
                            if z_id.startswith(zombie_id):
                                # found corresponding container
                                zombie_container.add_metric([minor, zombie_id], 1)
                    else:
                        external_process.add_metric([minor, str(pid)], 1)
            if len(zombie_container.samples) > 0 or len(external_process.samples) > 0:
                logger.warning("found gpu used by external %s, zombie container %s",
                        external_process, zombie_container)

        return [core_utils, mem_utils, ecc_errors, mem_leak,
            external_process, zombie_container, gpu_temp, retired_page]

    def collect_impl(self):
        gpu_info = nvidia.nvidia_smi(GpuCollector.cmd_histogram,
                GpuCollector.cmd_timeout)

        logger.debug("get gpu_info %s", gpu_info)

        now = datetime.datetime.now()
        self.gpu_info_ref.set(gpu_info, now)
        zombie_info = self.zombie_info_ref.get(now)

        if gpu_info is not None:
            return GpuCollector.convert_to_metrics(gpu_info, zombie_info,
                    GpuCollector.get_container_id, self.mem_leak_thrashold)
        return None
示例#11
0
class ContainerCollector(Collector):
    stats_histogram = Histogram(
        "cmd_docker_stats_latency_seconds",
        "Command call latency for docker stats (seconds)",
        buckets=(1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0,
                 1024.0, float("inf")))
    stats_timeout = 20
    # 99th latency may larger than 10s,
    # Because prometheus's largest bucket for recording histogram is 10s,
    # we can not get value higher than 10s.

    inspect_histogram = Histogram("cmd_docker_inspect_latency_seconds",
            "Command call latency for docker inspect (seconds)")
    inspect_timeout = 2 # 99th latency is 0.042s

    iftop_histogram = Histogram("cmd_iftop_latency_seconds",
            "Command call latency for iftop (seconds)")
    iftop_timeout = 10 # 99th latency is 7.4s

    lsof_histogram = Histogram("cmd_lsof_latency_seconds",
            "Command call latency for lsof (seconds)")
    lsof_timeout = 2 # 99th latency is 0.5s

    pai_services = list(
        map(
            lambda s: "k8s_" + s,
            [
                "grafana",
                "prometheus",
                "alertmanager",
                "watchdog",
                "end-to-end-test",
                "yarn-frameworklauncher",
                "hadoop-jobhistory-service",
                "hadoop-name-node",
                "hadoop-node-manager",
                "hadoop-resource-manager",
                "hadoop-data-node",
                "zookeeper",
                "node-exporter",
                "job-exporter",
                "yarn-exporter",
                "nvidia-drivers",
                "docker-cleaner",

                # Below are DLTS services
                "nginx",
                "restfulapi",
                "weave",
                "weave-npc",
                "nvidia-device-plugin-ctr",
                "mysql",
                "jobmanager",
                "fluent-bit",
                "azure-blob-adapter",
                "nvidia-dcgm-exporter",
                "alert-manager",
                "reaper",
                "dashboard",
                "kubedns",
                "repairmanager",
                "redis",
            ]))

    def __init__(self, name, sleep_time, atomic_ref, iteration_counter, gpu_info_ref,
            stats_info_ref, interface,npu_info_ref,dcgm_info_ref):
        Collector.__init__(self, name, sleep_time, atomic_ref, iteration_counter)
        self.gpu_info_ref = gpu_info_ref
        self.npu_info_ref = npu_info_ref
        self.stats_info_ref = stats_info_ref
        self.dcgm_info_ref = dcgm_info_ref

        self.network_interface = network.try_to_get_right_interface(interface)
        logger.info("found %s as potential network interface to listen network traffic",
                self.network_interface)

        # k8s will prepend "k8s_" to pod name. There will also be a container name
        # prepend with "k8s_POD_" which is a docker container used to construct
        # network & pid namespace for specific container. These container prepend
        # with "k8s_POD" consume nothing.

    def collect_impl(self):
        all_conns = network.iftop(self.network_interface,
                ContainerCollector.iftop_histogram,
                ContainerCollector.iftop_timeout)

        stats_obj = docker_stats.stats(ContainerCollector.stats_histogram,
                ContainerCollector.stats_timeout)

        now = datetime.datetime.now()
        gpu_infos = self.gpu_info_ref.get(now)
        npu_infos = self.npu_info_ref.get(now)
        self.stats_info_ref.set(stats_obj, now)
        dcgm_infos = self.dcgm_info_ref.get(now)

        logger.debug("all_conns is %s", all_conns)
        logger.debug("gpu_info is %s", gpu_infos)
        logger.debug("stats_obj is %s", stats_obj)
        logger.debug("dcgm_infos is %s", dcgm_infos)

        return self.collect_container_metrics(stats_obj, gpu_infos, all_conns,npu_infos,dcgm_infos)

    @staticmethod
    def parse_from_labels(inspect_info, gpu_infos):
        gpu_ids = []
        npu_ids = []
        result_labels = {}

        result_labels["username"] = inspect_info.username or "unknown"
        result_labels["job_name"] = inspect_info.job_name or "unknown"
        result_labels["role_name"] = inspect_info.role_name or "unknown"
        result_labels["task_index"] = inspect_info.task_index or "unknown"
        result_labels["pod_name"] = inspect_info.pod_name or "unknown"
        result_labels["user_email"] = inspect_info.email or "unknown"
        result_labels["vc_name"] = inspect_info.vc_name or "unknown"

        if inspect_info.gpu_ids:
            ids = inspect_info.gpu_ids.replace("\"", "").split(",")
            for id in ids:
                # If the container was scheduled by yarn, we get its GPU usage
                # info from label GPU_ID, value of the label is minor_number, and
                # will be digits.
                # If the container was scheduled by kube launcher, we get its GPU
                # usage info from environment NVIDIA_VISIBLE_DEVICES, the value
                # is like GPU-dc0671b0-61a4-443e-f456-f8fa6359b788. The mapping
                # from uuid to minor_number is get via nvidia-smi, and gpu_infos
                # should have key of this uuid.
                if id.isdigit():
                    gpu_ids.append(id)
                elif id and gpu_infos is not None:
                    # id is in form of UUID like
                    if gpu_infos.get(id) is not None:
                        gpu_ids.append(gpu_infos[id].minor)
                    else:
                        logger.warning("gpu uuid %s can not be found in map %s",
                                id, gpu_infos)
                else:
                    logger.warning("unknown gpu id %s, gpu_infos is %s",
                            id, gpu_infos)
        if inspect_info.npu_ids:
            ids = inspect_info.npu_ids.replace("\"", "").split(",")
            for id in ids:
                if id.isdigit():
                    npu_ids.append(id)

        return gpu_ids,npu_ids,result_labels

    @classmethod
    def infer_service_name(cls, container_name):
        """ try to infer service name from container_name, if it's container not belongs
        to pai service, will return None """
        if container_name.startswith("k8s_POD_"):
            # this is empty container created by k8s for pod
            return None

        # TODO speed this up, since this is O(n^2)
        for service_name in cls.pai_services:
            if container_name.startswith(service_name):
                return service_name[4:] # remove "k8s_" prefix

        return None

    def process_one_container(self, container_id, stats, gpu_infos, all_conns, gauges,npu_infos,dcgm_infos):
        container_name = utils.walk_json_field_safe(stats, "name")
        pai_service_name = ContainerCollector.infer_service_name(container_name)

        inspect_info = docker_inspect.inspect(container_id,
                ContainerCollector.inspect_histogram,
                ContainerCollector.inspect_timeout)

        if inspect_info is None:
            return

        pid = inspect_info.pid
        job_name = inspect_info.job_name

        logger.debug("%s has inspect result %s, service_name %s",
                container_name, inspect_info, pai_service_name)

        if job_name is None and pai_service_name is None:
            logger.debug("%s is ignored", container_name)
            return # other container, maybe kubelet or api-server

        # get network consumption, if container is host network, we will treat
        # node network consumption as container consumption. If not, use data
        # from docker state.
        # This will result network consumption of service using host network
        # equals to node network consumption.
        is_host_network = inspect_info.is_host_network
        if is_host_network:
            net_in, net_out = network.get_network_consumption(
                self.network_interface)
        else:
            net_in, net_out = network.get_non_host_network_consumption(pid)

        if pai_service_name is None:
            gpu_ids,npu_ids,container_labels = ContainerCollector.parse_from_labels(inspect_info, gpu_infos)
            logger.info("start to collect metric for jobId: %s",container_labels["job_name"])
            if container_labels["username"] == "unknown":
                logger.warn("jobId: %s has none username,pass!" %(container_labels["job_name"]))
                return
            if gpu_infos:
                for id in gpu_ids:
                    if gpu_infos.get(id) is None:
                        continue

                    nvidia_gpu_status = gpu_infos[id]
                    uuid = nvidia_gpu_status.uuid
                    labels = copy.deepcopy(container_labels)
                    labels["minor_number"] = id
                    labels["device_type"] = inspect_info.gpu_type or "unknown"
                    labels["uuid"] = uuid
                    labels["device_str"] = "nvidia.com/gpu"

                    gauges.add_value("task_device_percent",
                            labels, nvidia_gpu_status.gpu_util)
                    gauges.add_value("task_device_mem_percent",
                            labels, nvidia_gpu_status.gpu_mem_util)

            if npu_infos:
                for id in npu_ids:
                    if npu_infos.get(id) is None:
                        continue

                    npu_status = npu_infos[id]
                    labels = copy.deepcopy(container_labels)
                    labels["minor_number"] = id
                    labels["device_type"] = inspect_info.gpu_type or "unknown"
                    labels["device_str"] = "npu.huawei.com/NPU"
                    ### each npu device should have one unique string
                    labels["uuid"] = id
                    if inspect_info.node_name:
                        labels["uuid"] =inspect_info.node_name+ "_" + str(id)

                    gauges.add_value("task_device_percent",
                            labels, npu_status.npu_util)
                    gauges.add_value("task_device_mem_percent",
                            labels, npu_status.npu_mem_util)
            if dcgm_infos:
                for id in gpu_ids:
                    if dcgm_infos.get(id) is None:
                        continue
                    dcgm_metric = dcgm_infos[id] # will be type of DCGMMetrics
                    uuid = dcgm_metric.uuid
                    labels = copy.deepcopy(container_labels)
                    labels["minor_number"] = id
                    labels["uuid"] = uuid
                    gauges.add_dcgm_metric(dcgm_metric, labels)

            gauges.add_value("task_cpu_percent", container_labels, stats["CPUPerc"])
            gauges.add_value("task_mem_usage_byte", container_labels, stats["MemUsage_Limit"]["usage"])
            gauges.add_value("task_mem_limit_byte", container_labels, stats["MemUsage_Limit"]["limit"])
            gauges.add_value("task_net_in_byte", container_labels, net_in)
            gauges.add_value("task_net_out_byte", container_labels, net_out)
            gauges.add_value("task_block_in_byte", container_labels, stats["BlockIO"]["in"])
            gauges.add_value("task_block_out_byte", container_labels, stats["BlockIO"]["out"])
            gauges.add_value("task_mem_usage_percent", container_labels, stats["MemPerc"])
        else:
            labels = {"name": pai_service_name}
            gauges.add_value("service_cpu_percent", labels, stats["CPUPerc"])
            gauges.add_value("service_mem_usage_byte", labels, stats["MemUsage_Limit"]["usage"])
            gauges.add_value("service_mem_limit_byte", labels, stats["MemUsage_Limit"]["limit"])
            gauges.add_value("service_mem_usage_percent", labels, stats["MemPerc"])
            gauges.add_value("service_net_in_byte", labels, net_in)
            gauges.add_value("service_net_out_byte", labels, net_out)
            gauges.add_value("service_block_in_byte", labels, stats["BlockIO"]["in"])
            gauges.add_value("service_block_out_byte", labels, stats["BlockIO"]["out"])

    def collect_container_metrics(self, stats_obj, gpu_infos, all_conns,npu_infos,dcgm_infos):
        if stats_obj is None:
            logger.warning("docker stats returns None")
            return None

        gauges = ResourceGauges()

        for container_id, stats in stats_obj.items():
            try:
                self.process_one_container(container_id, stats, gpu_infos, all_conns, gauges,npu_infos,dcgm_infos)
            except Exception:
                logger.exception("error when trying to process container %s with name %s",
                        container_id, utils.walk_json_field_safe(stats, "name"))

        return gauges.as_array()
示例#12
0
class ContainerCollector(Collector):
    stats_histogram = Histogram(
        "cmd_docker_stats_latency_seconds",
        "Command call latency for docker stats (seconds)")
    stats_timeout = 20
    # 99th latency may larger than 10s,
    # Because prometheus's largest bucket for recording histogram is 10s,
    # we can not get value higher than 10s.

    inspect_histogram = Histogram(
        "cmd_docker_inspect_latency_seconds",
        "Command call latency for docker inspect (seconds)")
    inspect_timeout = 1  # 99th latency is 0.042s

    iftop_histogram = Histogram("cmd_iftop_latency_seconds",
                                "Command call latency for iftop (seconds)")
    iftop_timeout = 10  # 99th latency is 7.4s

    lsof_histogram = Histogram("cmd_lsof_latency_seconds",
                               "Command call latency for lsof (seconds)")
    lsof_timeout = 2  # 99th latency is 0.5s

    pai_services = list(
        map(
            lambda s: "k8s_" + s,
            [
                "rest-server",
                "pylon",
                "webportal",
                "grafana",
                "prometheus",
                "alertmanager",
                "watchdog",
                "end-to-end-test",
                "yarn-frameworklauncher",
                "hadoop-jobhistory-service",
                "hadoop-name-node",
                "hadoop-node-manager",
                "hadoop-resource-manager",
                "hadoop-data-node",
                "zookeeper",
                "node-exporter",
                "job-exporter",
                "yarn-exporter",
                "nvidia-drivers",
                "docker-cleaner",

                # Below are DLTS services
                "nginx",
                "restfulapi",
                "weave",
                "weave-npc",
                "nvidia-device-plugin-ctr",
                "mysql",
                "jobmanager",
            ]))

    def __init__(self, name, sleep_time, atomic_ref, iteration_counter,
                 gpu_info_ref, stats_info_ref, interface):
        Collector.__init__(self, name, sleep_time, atomic_ref,
                           iteration_counter)
        self.gpu_info_ref = gpu_info_ref
        self.stats_info_ref = stats_info_ref

        self.network_interface = network.try_to_get_right_interface(interface)
        logger.info(
            "found %s as potential network interface to listen network traffic",
            self.network_interface)

        # k8s will prepend "k8s_" to pod name. There will also be a container name
        # prepend with "k8s_POD_" which is a docker container used to construct
        # network & pid namespace for specific container. These container prepend
        # with "k8s_POD" consume nothing.

    def collect_impl(self):
        all_conns = network.iftop(self.network_interface,
                                  ContainerCollector.iftop_histogram,
                                  ContainerCollector.iftop_timeout)

        stats_obj = docker_stats.stats(ContainerCollector.stats_histogram,
                                       ContainerCollector.stats_timeout)

        now = datetime.datetime.now()
        gpu_infos = self.gpu_info_ref.get(now)
        self.stats_info_ref.set(stats_obj, now)

        logger.debug("all_conns is %s", all_conns)
        logger.debug("gpu_info is %s", gpu_infos)
        logger.debug("stats_obj is %s", stats_obj)

        return self.collect_container_metrics(stats_obj, gpu_infos, all_conns)

    @staticmethod
    def parse_from_labels(inspect_info, gpu_infos):
        gpu_ids = []
        result_labels = {}

        result_labels["username"] = inspect_info.username or "unknown"
        result_labels["job_name"] = inspect_info.job_name or "unknown"
        result_labels["role_name"] = inspect_info.role_name or "unknown"
        result_labels["task_index"] = inspect_info.task_index or "unknown"
        result_labels[
            "job_instance_id"] = inspect_info.job_instance_id or "unknown"

        if inspect_info.gpu_ids:
            ids = inspect_info.gpu_ids.replace("\"", "").split(",")
            for id in ids:
                # If the container was scheduled by yarn, we get its GPU usage
                # info from label GPU_ID, value of the label is minor_number, and
                # will be digits.
                # If the container was scheduled by kube launcher, we get its GPU
                # usage info from environment NVIDIA_VISIBLE_DEVICES, the value
                # is like GPU-dc0671b0-61a4-443e-f456-f8fa6359b788. The mapping
                # from uuid to minor_number is get via nvidia-smi, and gpu_infos
                # should have key of this uuid.
                if id.isdigit():
                    gpu_ids.append(id)
                elif id and gpu_infos is not None:
                    # id is in form of UUID like
                    if gpu_infos.get(id) is not None:
                        gpu_ids.append(gpu_infos[id].minor)
                    else:
                        logger.warning(
                            "gpu uuid %s can not be found in map %s", id,
                            gpu_infos)
                else:
                    logger.warning("unknown gpu id %s, gpu_infos is %s", id,
                                   gpu_infos)

        return gpu_ids, result_labels

    @classmethod
    def infer_service_name(cls, container_name):
        """ try to infer service name from container_name, if it's container not belongs
        to pai service, will return None """
        if container_name.startswith("k8s_POD_"):
            # this is empty container created by k8s for pod
            return None

        # TODO speed this up, since this is O(n^2)
        for service_name in cls.pai_services:
            if container_name.startswith(service_name):
                return service_name[4:]  # remove "k8s_" prefix

        return None

    def process_one_container(self, container_id, stats, gpu_infos, all_conns,
                              gauges):
        container_name = utils.walk_json_field_safe(stats, "name")
        pai_service_name = ContainerCollector.infer_service_name(
            container_name)

        inspect_info = docker_inspect.inspect(
            container_id, ContainerCollector.inspect_histogram,
            ContainerCollector.inspect_timeout)

        pid = inspect_info.pid
        job_name = inspect_info.job_name

        logger.debug("%s has inspect result %s, service_name %s",
                     container_name, inspect_info, pai_service_name)

        if job_name is None and pai_service_name is None:
            logger.debug("%s is ignored", container_name)
            return  # other container, maybe kubelet or api-server

        # get network consumption, since all our services/jobs running in host
        # network, and network statistic from docker is not specific to that
        # container. We have to get network statistic by ourselves.
        lsof_result = network.lsof(pid, ContainerCollector.lsof_histogram,
                                   ContainerCollector.lsof_timeout)

        net_in, net_out = network.get_container_network_metrics(
            all_conns, lsof_result)
        if logger.isEnabledFor(logging.DEBUG):
            debug_info = utils.exec_cmd(
                "ps -o cmd fp {0} | tail -n 1".format(pid), shell=True)

            logger.debug(
                "pid %s with cmd `%s` has lsof result %s, in %d, out %d", pid,
                debug_info.strip(), lsof_result, net_in, net_out)

        if pai_service_name is None:
            gpu_ids, container_labels = ContainerCollector.parse_from_labels(
                inspect_info, gpu_infos)

            if gpu_infos:
                for id in gpu_ids:
                    if gpu_infos.get(id) is None:
                        continue

                    nvidia_gpu_status = gpu_infos[id]
                    labels = copy.deepcopy(container_labels)
                    labels["minor_number"] = id

                    gauges.add_value("task_gpu_percent", labels,
                                     nvidia_gpu_status.gpu_util)
                    gauges.add_value("task_gpu_mem_percent", labels,
                                     nvidia_gpu_status.gpu_mem_util)

            gauges.add_value("task_cpu_percent", container_labels,
                             stats["CPUPerc"])
            gauges.add_value("task_mem_usage_byte", container_labels,
                             stats["MemUsage_Limit"]["usage"])
            gauges.add_value("task_mem_limit_byte", container_labels,
                             stats["MemUsage_Limit"]["limit"])
            gauges.add_value("task_net_in_byte", container_labels, net_in)
            gauges.add_value("task_net_out_byte", container_labels, net_out)
            gauges.add_value("task_block_in_byte", container_labels,
                             stats["BlockIO"]["in"])
            gauges.add_value("task_block_out_byte", container_labels,
                             stats["BlockIO"]["out"])
            gauges.add_value("task_mem_usage_percent", container_labels,
                             stats["MemPerc"])
        else:
            labels = {"name": pai_service_name}
            gauges.add_value("service_cpu_percent", labels, stats["CPUPerc"])
            gauges.add_value("service_mem_usage_byte", labels,
                             stats["MemUsage_Limit"]["usage"])
            gauges.add_value("service_mem_limit_byte", labels,
                             stats["MemUsage_Limit"]["limit"])
            gauges.add_value("service_mem_usage_percent", labels,
                             stats["MemPerc"])
            gauges.add_value("service_net_in_byte", labels, net_in)
            gauges.add_value("service_net_out_byte", labels, net_out)
            gauges.add_value("service_block_in_byte", labels,
                             stats["BlockIO"]["in"])
            gauges.add_value("service_block_out_byte", labels,
                             stats["BlockIO"]["out"])

    def collect_container_metrics(self, stats_obj, gpu_infos, all_conns):
        if stats_obj is None:
            logger.warning("docker stats returns None")
            return None

        gauges = ResourceGauges()

        for container_id, stats in stats_obj.items():
            try:
                self.process_one_container(container_id, stats, gpu_infos,
                                           all_conns, gauges)
            except Exception:
                logger.exception(
                    "error when trying to process container %s with name %s",
                    container_id, utils.walk_json_field_safe(stats, "name"))

        return gauges.as_array()
"""Prometheus metrics."""
from prometheus_client import CollectorRegistry, multiprocess
from prometheus_client import Counter, Histogram, Gauge

REGISTRY = CollectorRegistry()
multiprocess.MultiProcessCollector(REGISTRY)

TWEETS_COUNTER = Counter('tweets_counter',
                         'Global count of tweets',
                         registry=REGISTRY)

TWEETS_COUNTS_HISTOGRAM = Histogram('tweets_counts_histogram',
                                    'Tweets per request histogram',
                                    registry=REGISTRY)

TWEETS_AVERAGE_REQUEST_TIME = Gauge('tweets_average_fetch_time_gauge',
                                    'Average time of fetching one tweet',
                                    ['param'],
                                    registry=REGISTRY)

TWEETS_TIME_HISTOGRAM = Histogram('tweets_time_histogram',
                                  'Histogram of time per tweet',
                                  registry=REGISTRY)

TWEETS_AVERAGE_LENGHT = Gauge('tweets_average_length',
                              'Averge length of specific tweet', ['param'],
                              registry=REGISTRY)


def update_average_request_time(size, elapsed):
    """Parses script arguments."""
示例#14
0
from prometheus_client import Counter, Histogram
from django_prometheus.utils import Time, TimeSince, PowersOf
import django

if django.VERSION >= (1, 10, 0):
    from django.utils.deprecation import MiddlewareMixin
else:
    MiddlewareMixin = object

requests_total = Counter('django_http_requests_before_middlewares_total',
                         'Total count of requests before middlewares run.')
responses_total = Counter('django_http_responses_before_middlewares_total',
                          'Total count of responses before middlewares run.')
requests_latency_before = Histogram(
    'django_http_requests_latency_including_middlewares_seconds',
    ('Histogram of requests processing time (including middleware '
     'processing time).'))
requests_unknown_latency_before = Counter(
    'django_http_requests_unknown_latency_including_middlewares_total',
    ('Count of requests for which the latency was unknown (when computing '
     'django_http_requests_latency_including_middlewares_seconds).'))


class PrometheusBeforeMiddleware(MiddlewareMixin):
    """Monitoring middleware that should run before other middlewares."""
    def process_request(self, request):
        requests_total.inc()
        request.prometheus_before_middleware_event = Time()

    def process_response(self, request, response):
        responses_total.inc()
示例#15
0
from synapse.api.constants import EventTypes
from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, StateResolutionVersions
from synapse.events.snapshot import EventContext
from synapse.state import v1, v2
from synapse.util.async_helpers import Linearizer
from synapse.util.caches import get_cache_factor_for
from synapse.util.caches.expiringcache import ExpiringCache
from synapse.util.logutils import log_function
from synapse.util.metrics import Measure

logger = logging.getLogger(__name__)

# Metrics for number of state groups involved in a resolution.
state_groups_histogram = Histogram(
    "synapse_state_number_state_groups_in_resolution",
    "Number of state groups used when performing a state resolution",
    buckets=(1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500, "+Inf"),
)

KeyStateTuple = namedtuple("KeyStateTuple", ("context", "type", "state_key"))

SIZE_OF_CACHE = 100000 * get_cache_factor_for("state_cache")
EVICTION_TIMEOUT_SECONDS = 60 * 60

_NEXT_STATE_ID = 1

POWER_KEY = (EventTypes.PowerLevels, "")


def _gen_state_id():
    global _NEXT_STATE_ID
示例#16
0
import logging
from timeit import default_timer

from flask import request
from prometheus_client import Counter, Histogram, Info

logger = logging.getLogger(__name__)

APP_NAME = "phippy_api"
APP_INFO = Info("api_version", "API Version")
ERRORS_COUNT = Counter("errors_total", "Number of errors", ["app", "verb", "endpoint", "status"])
REQUESTS_COUNT = Counter("request_total", "Request duration in seconds", ["app", "verb", "endpoint", "status"])
REQUEST_DURATION_HISTOGRAM = Histogram(
    "request_duration_seconds", "Request duration in seconds", ["app", "verb", "endpoint", "status"]
)


def register_metrics(app, app_version=None, app_config=None):
    """Register metrics middlewares"""

    app.before_request(before_request)
    app.after_request(after_request)


def record_error_metric(status=None):
    """Record errors"""

    ERRORS_COUNT.labels(
        APP_NAME,
        request.method,
        request.endpoint,
示例#17
0
GITHUB_ACCESS_TOKENS_SELECTOR = 0

CODE_INVENTORY = Gauge('code_inventory',
                       'Amount of unmerged work in a repository.',
                       ['owner', 'repo', 'metric'])

FEATURES = Gauge(
    'features',
    'Counts of features in org repositories, based on number of manifest files.',
    ['owner', 'repo'])

CODE_INVENTORY_AGE = Histogram('code_inventory_age',
                               'Code inventory age in days.',
                               ['owner', 'repo'],
                               buckets=[
                                   1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16,
                                   18, 20, 25, 30, 35, 40, 50, 60, 70, 80, 90,
                                   100, 150, 200, 250, 300, 365,
                                   float("inf")
                               ])

REPO_SCRAPE_TIMES = {}


def get_access_token():
    global GITHUB_ACCESS_TOKENS_SELECTOR
    token = GITHUB_ACCESS_TOKENS[GITHUB_ACCESS_TOKENS_SELECTOR % 2]
    GITHUB_ACCESS_TOKENS_SELECTOR += 1
    return token

示例#18
0
文件: hello.py 项目: houssemFat/tools
from prometheus_client import start_http_server, Summary, Histogram, Counter, Gauge
import random
import time

# Create a metric to track time spent and requests made.
request_time = Summary('request_processing_seconds',
                       'Time spent processing request')
histogram = Histogram('request_latency_seconds', 'Request latency')
#histogram.DEFAULT_BUCKETS = (.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, INF)
counter = Counter('http_requests_total', 'Total Request Count')
in_progress = Gauge('requests_in_progress_total', 'Requests in progress')


# Decorate function with metric.
@request_time.time()
@histogram.time()
#@in_progress.time()
def process_request(t):
    """A dummy function that takes some time."""
    time.sleep(t)
    time.sleep(2)
    #counter.inc()


if __name__ == '__main__':
    # Start up the server to expose the metrics.
    start_http_server(4000)
    # Generate some requests.
    while True:
        process_request(random.random())
示例#19
0
import time

from flask import request
from prometheus_client import Counter, Histogram, Info

#
# Metrics registration
#

METRICS_REQUEST_LATENCY = Histogram("app_request_latency_seconds",
                                    "Application Request Latency",
                                    ["method", "endpoint"])

METRICS_REQUEST_COUNT = Counter(
    "app_request_count",
    "Application Request Count",
    ["method", "endpoint", "http_status"],
)

METRICS_INFO = Info("app_version", "Application Version")

#
# Request callbacks
#


def before_request():
    """
    Get start time of a request
    """
    request._prometheus_metrics_request_start_time = time.time()
示例#20
0
import pyshark
import time
import os

from prometheus_client import Counter, Gauge, Histogram, start_http_server

# Global variables
HTTP_REQUESTS = []

# Number of http requests currently being processed.
http_inprogress_requests = Gauge('http_inprogress_requests', '<description/>')
http_request_latency = Histogram('http_request_latency_ms', '<description/>')
http_counter = Counter('http_request_total', '<description/>',
                       ['method', 'uri', 'response_code', 'response_time'])  #,


# Main
def main():
    #Start prometheus exporter.
    start_http_server(12301)

    #Setup of pyshark
    # TODO: move interface and ip to environment variables...
    capture = pyshark.LiveCapture(
        interface='eth0',
        bpf_filter='host ' + os.environ['ROYALNETM_IP'] +
        ' and not port 12301')  #, display_filter='http')
    capture.set_debug()
    capture

    for packet in capture.sniff_continuously():
示例#21
0
# The number of times we are recalculating state when there is only a
# single forward extremity
state_delta_single_event_counter = Counter(
    "synapse_storage_events_state_delta_single_event", "")

# The number of times we are reculating state when we could have resonably
# calculated the delta when we calculated the state for an event we were
# persisting.
state_delta_reuse_delta_counter = Counter(
    "synapse_storage_events_state_delta_reuse_delta", "")

# The number of forward extremities for each new event.
forward_extremities_counter = Histogram(
    "synapse_storage_events_forward_extremities_persisted",
    "Number of forward extremities for each new event",
    buckets=(1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500, "+Inf"),
)

# The number of stale forward extremities for each new event. Stale extremities
# are those that were in the previous set of extremities as well as the new.
stale_forward_extremities_counter = Histogram(
    "synapse_storage_events_stale_forward_extremities_persisted",
    "Number of unchanged forward extremities for each new event",
    buckets=(0, 1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500, "+Inf"),
)

state_resolutions_during_persistence = Counter(
    "synapse_storage_events_state_resolutions_during_persistence",
    "Number of times we had to do state res to calculate new current state",
)
示例#22
0
from tornado.concurrent import chain_future, Future
from tornado import gen, web
from tornado.queues import Queue
from tornado.iostream import StreamClosedError
from tornado.ioloop import IOLoop
from tornado.log import app_log
from prometheus_client import Histogram, Gauge

from .base import BaseHandler
from .build import Build, FakeBuild
BUCKETS = [
    2, 5, 10, 15, 20, 25, 30, 60, 120, 240, 480, 960, 1920,
    float("inf")
]
BUILD_TIME = Histogram('binderhub_build_time_seconds',
                       'Histogram of build times',
                       ['status', 'provider', 'repo'],
                       buckets=BUCKETS)
LAUNCH_TIME = Histogram('binderhub_launch_time_seconds',
                        'Histogram of launch times',
                        ['status', 'provider', 'repo', 'retries'],
                        buckets=BUCKETS)
BUILDS_INPROGRESS = Gauge('binderhub_inprogress_builds',
                          'Builds currently in progress')
LAUNCHES_INPROGRESS = Gauge('binderhub_inprogress_launches',
                            'Launches currently in progress')


class BuildHandler(BaseHandler):
    """A handler for working with GitHub."""
    # emit keepalives every 25 seconds to avoid idle connections being closed
    KEEPALIVE_INTERVAL = 25
from flask import Response, Flask, request
import prometheus_client
from prometheus_client.core import CollectorRegistry
from prometheus_client import Summary, Counter, Histogram, Gauge
import time

app = Flask(__name__)

_INF = float("inf")

graphs = {}
graphs['c'] = Counter('python_request_operations_total',
                      'The total number of processed requests')
graphs['h'] = Histogram('python_request_duration_seconds',
                        'Histogram for the duration in seconds.',
                        buckets=(1, 2, 5, 6, 10, _INF))


@app.route("/")
def hello():
    start = time.time()
    graphs['c'].inc()

    time.sleep(0.600)
    end = time.time()
    graphs['h'].observe(end - start)
    return "Hello World!"


@app.route("/metrics")
def requests_count():
示例#24
0
def set_histogram(name, *args, **kwargs):
    metrics[name] = Histogram(name, *args, **kwargs)
示例#25
0
文件: __init__.py 项目: xiu/synapse
#

gc_unreachable = Gauge("python_gc_unreachable_total", "Unreachable GC objects",
                       ["gen"])
gc_time = Histogram(
    "python_gc_time",
    "Time taken to GC (sec)",
    ["gen"],
    buckets=[
        0.0025,
        0.005,
        0.01,
        0.025,
        0.05,
        0.10,
        0.25,
        0.50,
        1.00,
        2.50,
        5.00,
        7.50,
        15.00,
        30.00,
        45.00,
        60.00,
    ],
)


class GCCounts:
    def collect(self):
示例#26
0
class BlockProcessor:
    """Process blocks and update the DB state to match.

    Employ a prefetcher to prefetch blocks in batches for processing.
    Coordinate backing up in case of chain reorganisations.
    """

    block_count_metric = Gauge("block_count",
                               "Number of processed blocks",
                               namespace=NAMESPACE)
    block_update_time_metric = Histogram("block_time",
                                         "Block update times",
                                         namespace=NAMESPACE,
                                         buckets=HISTOGRAM_BUCKETS)
    reorg_count_metric = Gauge("reorg_count",
                               "Number of reorgs",
                               namespace=NAMESPACE)

    def __init__(self, env, db, daemon, notifications):
        self.env = env
        self.db = db
        self.daemon = daemon
        self.notifications = notifications

        self.coin = env.coin
        self.blocks_event = asyncio.Event()
        self.prefetcher = Prefetcher(daemon, env.coin, self.blocks_event)
        self.logger = class_logger(__name__, self.__class__.__name__)
        self.executor = ThreadPoolExecutor(1)

        # Meta
        self.next_cache_check = 0
        self.touched = set()
        self.reorg_count = 0

        # Caches of unflushed items.
        self.headers = []
        self.block_hashes = []
        self.block_txs = []
        self.undo_infos = []

        # UTXO cache
        self.utxo_cache = {}
        self.db_deletes = []

        # If the lock is successfully acquired, in-memory chain state
        # is consistent with self.height
        self.state_lock = asyncio.Lock()

        self.search_cache = {}
        self.history_cache = {}

    async def run_in_thread_with_lock(self, func, *args):
        # Run in a thread to prevent blocking.  Shielded so that
        # cancellations from shutdown don't lose work - when the task
        # completes the data will be flushed and then we shut down.
        # Take the state lock to be certain in-memory state is
        # consistent and not being updated elsewhere.
        async def run_in_thread_locked():
            async with self.state_lock:
                return await asyncio.get_event_loop().run_in_executor(
                    self.executor, func, *args)

        return await asyncio.shield(run_in_thread_locked())

    async def check_and_advance_blocks(self, raw_blocks):
        """Process the list of raw blocks passed.  Detects and handles
        reorgs.
        """
        if not raw_blocks:
            return
        first = self.height + 1
        blocks = [
            self.coin.block(raw_block, first + n)
            for n, raw_block in enumerate(raw_blocks)
        ]
        headers = [block.header for block in blocks]
        hprevs = [self.coin.header_prevhash(h) for h in headers]
        chain = [self.tip] + [self.coin.header_hash(h) for h in headers[:-1]]

        if hprevs == chain:
            start = time.perf_counter()
            await self.run_in_thread_with_lock(self.advance_blocks, blocks)
            for cache in self.search_cache.values():
                cache.clear()
            self.history_cache.clear()
            self.notifications.notified_mempool_txs.clear()
            await self._maybe_flush()
            processed_time = time.perf_counter() - start
            self.block_count_metric.set(self.height)
            self.block_update_time_metric.observe(processed_time)
            if not self.db.first_sync:
                s = '' if len(blocks) == 1 else 's'
                self.logger.info('processed {:,d} block{} in {:.1f}s'.format(
                    len(blocks), s, processed_time))
            if self._caught_up_event.is_set():
                await self.notifications.on_block(self.touched, self.height)
            self.touched = set()
        elif hprevs[0] != chain[0]:
            await self.reorg_chain()
        else:
            # It is probably possible but extremely rare that what
            # bitcoind returns doesn't form a chain because it
            # reorg-ed the chain as it was processing the batched
            # block hash requests.  Should this happen it's simplest
            # just to reset the prefetcher and try again.
            self.logger.warning('daemon blocks do not form a chain; '
                                'resetting the prefetcher')
            await self.prefetcher.reset_height(self.height)

    async def reorg_chain(self, count: Optional[int] = None):
        """Handle a chain reorganisation.

        Count is the number of blocks to simulate a reorg, or None for
        a real reorg."""
        if count is None:
            self.logger.info('chain reorg detected')
        else:
            self.logger.info(f'faking a reorg of {count:,d} blocks')
        await self.flush(True)

        async def get_raw_blocks(last_height, hex_hashes):
            heights = range(last_height, last_height - len(hex_hashes), -1)
            try:
                blocks = [
                    await self.db.read_raw_block(height) for height in heights
                ]
                self.logger.info(f'read {len(blocks)} blocks from disk')
                return blocks
            except FileNotFoundError:
                return await self.daemon.raw_blocks(hex_hashes)

        def flush_backup():
            # self.touched can include other addresses which is
            # harmless, but remove None.
            self.touched.discard(None)
            self.db.flush_backup(self.flush_data(), self.touched)

        start, last, hashes = await self.reorg_hashes(count)
        # Reverse and convert to hex strings.
        hashes = [hash_to_hex_str(hash) for hash in reversed(hashes)]
        for hex_hashes in chunks(hashes, 50):
            raw_blocks = await get_raw_blocks(last, hex_hashes)
            await self.run_in_thread_with_lock(self.backup_blocks, raw_blocks)
            await self.run_in_thread_with_lock(flush_backup)
            last -= len(raw_blocks)
        await self.run_in_thread_with_lock(
            self.db.sql.delete_claims_above_height, self.height)
        await self.prefetcher.reset_height(self.height)
        self.reorg_count_metric.inc()

    async def reorg_hashes(self, count):
        """Return a pair (start, last, hashes) of blocks to back up during a
        reorg.

        The hashes are returned in order of increasing height.  Start
        is the height of the first hash, last of the last.
        """
        start, count = await self.calc_reorg_range(count)
        last = start + count - 1
        s = '' if count == 1 else 's'
        self.logger.info(f'chain was reorganised replacing {count:,d} '
                         f'block{s} at heights {start:,d}-{last:,d}')

        return start, last, await self.db.fs_block_hashes(start, count)

    async def calc_reorg_range(self, count: Optional[int]):
        """Calculate the reorg range"""
        def diff_pos(hashes1, hashes2):
            """Returns the index of the first difference in the hash lists.
            If both lists match returns their length."""
            for n, (hash1, hash2) in enumerate(zip(hashes1, hashes2)):
                if hash1 != hash2:
                    return n
            return len(hashes)

        if count is None:
            # A real reorg
            start = self.height - 1
            count = 1
            while start > 0:
                hashes = await self.db.fs_block_hashes(start, count)
                hex_hashes = [hash_to_hex_str(hash) for hash in hashes]
                d_hex_hashes = await self.daemon.block_hex_hashes(start, count)
                n = diff_pos(hex_hashes, d_hex_hashes)
                if n > 0:
                    start += n
                    break
                count = min(count * 2, start)
                start -= count

            count = (self.height - start) + 1
        else:
            start = (self.height - count) + 1

        return start, count

    def estimate_txs_remaining(self):
        # Try to estimate how many txs there are to go
        daemon_height = self.daemon.cached_height()
        coin = self.coin
        tail_count = daemon_height - max(self.height, coin.TX_COUNT_HEIGHT)
        # Damp the initial enthusiasm
        realism = max(2.0 - 0.9 * self.height / coin.TX_COUNT_HEIGHT, 1.0)
        return (tail_count * coin.TX_PER_BLOCK +
                max(coin.TX_COUNT - self.tx_count, 0)) * realism

    # - Flushing
    def flush_data(self):
        """The data for a flush.  The lock must be taken."""
        assert self.state_lock.locked()
        return FlushData(self.height, self.tx_count, self.headers,
                         self.block_hashes, self.block_txs, self.undo_infos,
                         self.utxo_cache, self.db_deletes, self.tip)

    async def flush(self, flush_utxos):
        def flush():
            self.db.flush_dbs(self.flush_data(), flush_utxos,
                              self.estimate_txs_remaining)

        await self.run_in_thread_with_lock(flush)

    async def _maybe_flush(self):
        # If caught up, flush everything as client queries are
        # performed on the DB.
        if self._caught_up_event.is_set():
            await self.flush(True)
        elif time.perf_counter() > self.next_cache_check:
            await self.flush(True)
            self.next_cache_check = time.perf_counter() + 30

    def check_cache_size(self):
        """Flush a cache if it gets too big."""
        # Good average estimates based on traversal of subobjects and
        # requesting size from Python (see deep_getsizeof).
        one_MB = 1000 * 1000
        utxo_cache_size = len(self.utxo_cache) * 205
        db_deletes_size = len(self.db_deletes) * 57
        hist_cache_size = self.db.history.unflushed_memsize()
        # Roughly ntxs * 32 + nblocks * 42
        tx_hash_size = ((self.tx_count - self.db.fs_tx_count) * 32 +
                        (self.height - self.db.fs_height) * 42)
        utxo_MB = (db_deletes_size + utxo_cache_size) // one_MB
        hist_MB = (hist_cache_size + tx_hash_size) // one_MB

        self.logger.info('our height: {:,d} daemon: {:,d} '
                         'UTXOs {:,d}MB hist {:,d}MB'.format(
                             self.height, self.daemon.cached_height(), utxo_MB,
                             hist_MB))

        # Flush history if it takes up over 20% of cache memory.
        # Flush UTXOs once they take up 80% of cache memory.
        cache_MB = self.env.cache_MB
        if utxo_MB + hist_MB >= cache_MB or hist_MB >= cache_MB // 5:
            return utxo_MB >= cache_MB * 4 // 5
        return None

    def advance_blocks(self, blocks):
        """Synchronously advance the blocks.

        It is already verified they correctly connect onto our tip.
        """
        min_height = self.db.min_undo_height(self.daemon.cached_height())
        height = self.height

        for block in blocks:
            height += 1
            undo_info = self.advance_txs(
                height, block.transactions,
                self.coin.electrum_header(block.header, height),
                self.coin.header_hash(block.header))
            if height >= min_height:
                self.undo_infos.append((undo_info, height))
                self.db.write_raw_block(block.raw, height)

        headers = [block.header for block in blocks]
        self.height = height
        self.headers.extend(headers)
        self.tip = self.coin.header_hash(headers[-1])

    def advance_txs(self, height, txs, header, block_hash):
        self.block_hashes.append(block_hash)
        self.block_txs.append(
            (b''.join(tx_hash
                      for tx, tx_hash in txs), [tx.raw for tx, _ in txs]))

        undo_info = []
        tx_num = self.tx_count
        hashXs_by_tx = []

        # Use local vars for speed in the loops
        put_utxo = self.utxo_cache.__setitem__
        spend_utxo = self.spend_utxo
        undo_info_append = undo_info.append
        update_touched = self.touched.update
        append_hashX_by_tx = hashXs_by_tx.append
        hashX_from_script = self.coin.hashX_from_script

        for tx, tx_hash in txs:
            hashXs = []
            append_hashX = hashXs.append
            tx_numb = pack('<I', tx_num)

            # Spend the inputs
            for txin in tx.inputs:
                if txin.is_generation():
                    continue
                cache_value = spend_utxo(txin.prev_hash, txin.prev_idx)
                undo_info_append(cache_value)
                append_hashX(cache_value[:-12])

            # Add the new UTXOs
            for idx, txout in enumerate(tx.outputs):
                # Get the hashX.  Ignore unspendable outputs
                hashX = hashX_from_script(txout.pk_script)
                if hashX:
                    append_hashX(hashX)
                    put_utxo(tx_hash + pack('<H', idx),
                             hashX + tx_numb + pack('<Q', txout.value))

            append_hashX_by_tx(hashXs)
            update_touched(hashXs)
            self.db.total_transactions.append(tx_hash)
            tx_num += 1

        self.db.history.add_unflushed(hashXs_by_tx, self.tx_count)
        self.tx_count = tx_num
        self.db.tx_counts.append(tx_num)

        return undo_info

    def backup_blocks(self, raw_blocks):
        """Backup the raw blocks and flush.

        The blocks should be in order of decreasing height, starting at.
        self.height.  A flush is performed once the blocks are backed up.
        """
        self.db.assert_flushed(self.flush_data())
        assert self.height >= len(raw_blocks)

        coin = self.coin
        for raw_block in raw_blocks:
            # Check and update self.tip
            block = coin.block(raw_block, self.height)
            header_hash = coin.header_hash(block.header)
            if header_hash != self.tip:
                raise ChainError(
                    'backup block {} not tip {} at height {:,d}'.format(
                        hash_to_hex_str(header_hash),
                        hash_to_hex_str(self.tip), self.height))
            self.tip = coin.header_prevhash(block.header)
            self.backup_txs(block.transactions)
            self.height -= 1
            self.db.tx_counts.pop()

        self.logger.info(f'backed up to height {self.height:,d}')

    def backup_txs(self, txs):
        # Prevout values, in order down the block (coinbase first if present)
        # undo_info is in reverse block order
        undo_info = self.db.read_undo_info(self.height)
        if undo_info is None:
            raise ChainError(
                f'no undo information found for height {self.height:,d}')
        n = len(undo_info)

        # Use local vars for speed in the loops
        s_pack = pack
        undo_entry_len = 12 + HASHX_LEN

        for tx, tx_hash in reversed(txs):
            self.db.total_transactions.pop()
            for idx, txout in enumerate(tx.outputs):
                # Spend the TX outputs.  Be careful with unspendable
                # outputs - we didn't save those in the first place.
                hashX = self.coin.hashX_from_script(txout.pk_script)
                if hashX:
                    cache_value = self.spend_utxo(tx_hash, idx)
                    self.touched.add(cache_value[:-12])

            # Restore the inputs
            for txin in reversed(tx.inputs):
                if txin.is_generation():
                    continue
                n -= undo_entry_len
                undo_item = undo_info[n:n + undo_entry_len]
                self.utxo_cache[txin.prev_hash +
                                s_pack('<H', txin.prev_idx)] = undo_item
                self.touched.add(undo_item[:-12])

        assert n == 0
        self.tx_count -= len(txs)

    """An in-memory UTXO cache, representing all changes to UTXO state
    since the last DB flush.

    We want to store millions of these in memory for optimal
    performance during initial sync, because then it is possible to
    spend UTXOs without ever going to the database (other than as an
    entry in the address history, and there is only one such entry per
    TX not per UTXO).  So store them in a Python dictionary with
    binary keys and values.

      Key:    TX_HASH + TX_IDX           (32 + 2 = 34 bytes)
      Value:  HASHX + TX_NUM + VALUE     (11 + 4 + 8 = 23 bytes)

    That's 57 bytes of raw data in-memory.  Python dictionary overhead
    means each entry actually uses about 205 bytes of memory.  So
    almost 5 million UTXOs can fit in 1GB of RAM.  There are
    approximately 42 million UTXOs on bitcoin mainnet at height
    433,000.

    Semantics:

      add:   Add it to the cache dictionary.

      spend: Remove it if in the cache dictionary.  Otherwise it's
             been flushed to the DB.  Each UTXO is responsible for two
             entries in the DB.  Mark them for deletion in the next
             cache flush.

    The UTXO database format has to be able to do two things efficiently:

      1.  Given an address be able to list its UTXOs and their values
          so its balance can be efficiently computed.

      2.  When processing transactions, for each prevout spent - a (tx_hash,
          idx) pair - we have to be able to remove it from the DB.  To send
          notifications to clients we also need to know any address it paid
          to.

    To this end we maintain two "tables", one for each point above:

      1.  Key: b'u' + address_hashX + tx_idx + tx_num
          Value: the UTXO value as a 64-bit unsigned integer

      2.  Key: b'h' + compressed_tx_hash + tx_idx + tx_num
          Value: hashX

    The compressed tx hash is just the first few bytes of the hash of
    the tx in which the UTXO was created.  As this is not unique there
    will be potential collisions so tx_num is also in the key.  When
    looking up a UTXO the prefix space of the compressed hash needs to
    be searched and resolved if necessary with the tx_num.  The
    collision rate is low (<0.1%).
    """

    def spend_utxo(self, tx_hash, tx_idx):
        """Spend a UTXO and return the 33-byte value.

        If the UTXO is not in the cache it must be on disk.  We store
        all UTXOs so not finding one indicates a logic error or DB
        corruption.
        """
        # Fast track is it being in the cache
        idx_packed = pack('<H', tx_idx)
        cache_value = self.utxo_cache.pop(tx_hash + idx_packed, None)
        if cache_value:
            return cache_value

        # Spend it from the DB.

        # Key: b'h' + compressed_tx_hash + tx_idx + tx_num
        # Value: hashX
        prefix = b'h' + tx_hash[:4] + idx_packed
        candidates = {
            db_key: hashX
            for db_key, hashX in self.db.utxo_db.iterator(prefix=prefix)
        }

        for hdb_key, hashX in candidates.items():
            tx_num_packed = hdb_key[-4:]

            if len(candidates) > 1:
                tx_num, = unpack('<I', tx_num_packed)
                hash, height = self.db.fs_tx_hash(tx_num)
                if hash != tx_hash:
                    assert hash is not None  # Should always be found
                    continue

            # Key: b'u' + address_hashX + tx_idx + tx_num
            # Value: the UTXO value as a 64-bit unsigned integer
            udb_key = b'u' + hashX + hdb_key[-6:]
            utxo_value_packed = self.db.utxo_db.get(udb_key)
            if utxo_value_packed:
                # Remove both entries for this UTXO
                self.db_deletes.append(hdb_key)
                self.db_deletes.append(udb_key)
                return hashX + tx_num_packed + utxo_value_packed

        raise ChainError('UTXO {} / {:,d} not found in "h" table'.format(
            hash_to_hex_str(tx_hash), tx_idx))

    async def _process_prefetched_blocks(self):
        """Loop forever processing blocks as they arrive."""
        while True:
            if self.height == self.daemon.cached_height():
                if not self._caught_up_event.is_set():
                    await self._first_caught_up()
                    self._caught_up_event.set()
            await self.blocks_event.wait()
            self.blocks_event.clear()
            if self.reorg_count:
                await self.reorg_chain(self.reorg_count)
                self.reorg_count = 0
            else:
                blocks = self.prefetcher.get_prefetched_blocks()
                await self.check_and_advance_blocks(blocks)

    async def _first_caught_up(self):
        self.logger.info(f'caught up to height {self.height}')
        # Flush everything but with first_sync->False state.
        first_sync = self.db.first_sync
        self.db.first_sync = False
        await self.flush(True)
        if first_sync:
            self.logger.info(f'{lbry.__version__} synced to '
                             f'height {self.height:,d}')
        # Reopen for serving
        await self.db.open_for_serving()

    async def _first_open_dbs(self):
        await self.db.open_for_sync()
        self.height = self.db.db_height
        self.tip = self.db.db_tip
        self.tx_count = self.db.db_tx_count

    # --- External API

    async def fetch_and_process_blocks(self, caught_up_event):
        """Fetch, process and index blocks from the daemon.

        Sets caught_up_event when first caught up.  Flushes to disk
        and shuts down cleanly if cancelled.

        This is mainly because if, during initial sync ElectrumX is
        asked to shut down when a large number of blocks have been
        processed but not written to disk, it should write those to
        disk before exiting, as otherwise a significant amount of work
        could be lost.
        """
        self._caught_up_event = caught_up_event
        try:
            await self._first_open_dbs()
            await asyncio.wait([
                self.prefetcher.main_loop(self.height),
                self._process_prefetched_blocks()
            ])
        except asyncio.CancelledError:
            raise
        except:
            self.logger.exception("Block processing failed!")
            raise
        finally:
            # Shut down block processing
            self.logger.info('flushing to DB for a clean shutdown...')
            await self.flush(True)
            self.db.close()
            self.executor.shutdown(wait=True)

    def force_chain_reorg(self, count):
        """Force a reorg of the given number of blocks.

        Returns True if a reorg is queued, false if not caught up.
        """
        if self._caught_up_event.is_set():
            self.reorg_count = count
            self.blocks_event.set()
            return True
        return False
示例#27
0
will not actually exist until the first failure. This makes dashboarding
and alerting difficult, so we explicitly list statuses and create
them manually here.

.. versionchanged:: 1.3

    added ``jupyterhub_`` prefix to metric names.
"""
from enum import Enum

from prometheus_client import Gauge
from prometheus_client import Histogram

REQUEST_DURATION_SECONDS = Histogram(
    'jupyterhub_request_duration_seconds',
    'request duration for all HTTP requests',
    ['method', 'handler', 'code'],
)

SERVER_SPAWN_DURATION_SECONDS = Histogram(
    'jupyterhub_server_spawn_duration_seconds',
    'time taken for server spawning operation',
    ['status'],
    # Use custom bucket sizes, since the default bucket ranges
    # are meant for quick running processes. Spawns can take a while!
    buckets=[0.5, 1, 2.5, 5, 10, 15, 30, 60, 120,
             float("inf")],
)

RUNNING_SERVERS = Gauge('jupyterhub_running_servers',
                        'the number of user servers currently running')
示例#28
0
import bitmath
import rehash

from prometheus_client import Counter, Histogram

from data.registry_model import registry_model
from data.database import CloseForLongOperation, db_transaction
from digest import digest_tools
from util.registry.filelike import wrap_with_handler, StreamSlice
from util.registry.gzipstream import calculate_size_handler

logger = logging.getLogger(__name__)

chunk_upload_duration = Histogram(
    "quay_chunk_upload_duration_seconds",
    "number of seconds for a chunk to be uploaded to the registry",
    labelnames=["region"],
)
pushed_bytes_total = Counter("quay_registry_image_pushed_bytes_total",
                             "number of bytes pushed to the registry")

BLOB_CONTENT_TYPE = "application/octet-stream"


class BlobUploadException(Exception):
    """
    Base for all exceptions raised when uploading blobs.
    """


class BlobRangeMismatchException(BlobUploadException):
示例#29
0
from starlette.responses import Response
from starlette.routing import Match
from starlette.status import HTTP_500_INTERNAL_SERVER_ERROR
from starlette.types import ASGIApp

REQUESTS = Counter("starlette_requests_total",
                   "Total count of requests by method and path.",
                   ["method", "path_template"])
RESPONSES = Counter(
    "starlette_responses_total",
    "Total count of responses by method, path and status codes.",
    ["method", "path_template", "status_code"],
)
REQUESTS_PROCESSING_TIME = Histogram(
    "starlette_requests_processing_time_seconds",
    "Histogram of requests processing time by path (in seconds)",
    ["method", "path_template"],
)
EXCEPTIONS = Counter(
    "starlette_exceptions_total",
    "Total count of exceptions raised by path and exception type",
    ["method", "path_template", "exception_type"],
)
REQUESTS_IN_PROGRESS = Gauge(
    "starlette_requests_in_progress",
    "Gauge of requests by method and path currently being processed",
    ["method", "path_template"],
)


class PrometheusMiddleware(BaseHTTPMiddleware):
示例#30
0
from sentry_sdk.tracing import Span
from structlog.stdlib import BoundLogger, get_logger

from authentik.core.models import User
from authentik.policies.models import Policy, PolicyBinding, PolicyBindingModel, PolicyEngineMode
from authentik.policies.process import PolicyProcess, cache_key
from authentik.policies.types import PolicyRequest, PolicyResult

CURRENT_PROCESS = current_process()
GAUGE_POLICIES_CACHED = Gauge(
    "authentik_policies_cached",
    "Cached Policies",
)
HIST_POLICIES_BUILD_TIME = Histogram(
    "authentik_policies_build_time",
    "Execution times complete policy result to an object",
    ["object_name", "object_type", "user"],
)


class PolicyProcessInfo:
    """Dataclass to hold all information and communication channels to a process"""

    process: PolicyProcess
    connection: Connection
    result: Optional[PolicyResult]
    binding: PolicyBinding

    def __init__(self, process: PolicyProcess, connection: Connection,
                 binding: PolicyBinding):
        self.process = process