Пример #1
0
    async def _on_data_chunk(self, metric: str, data_chunk):
        # Fast-path if there are no value checks: do not decode the whole data
        # chunk, only extract the last timestamp and bump timeout checks.
        if not self._has_value_checks:
            if len(data_chunk.time_delta) > 0:
                last_timestamp = Timestamp(sum(data_chunk.time_delta))
                self._bump_timeout_checks(metric, last_timestamp)
            return

        tv_pairs = [
            TvPair(timestamp=Timestamp(t), value=v) for t, v in zip(
                accumulate(data_chunk.time_delta), data_chunk.value)
            if not isnan(v)
        ]

        if len(tv_pairs) == 0:
            logger.debug(
                f"No non-NaN values in DataChunk for metric {metric!r}")
            return

        # check that all values in this data chunk are within the desired
        # thresholds
        for check in self._checks.values():
            check.check(metric, tv_pairs)

        # "bump" all timeout checks with the last timestamp for which we
        # received values, i.e. reset the asynchronous timers that would
        # fire if we do not receive value for too long.
        last_timestamp = tv_pairs[-1].timestamp
        self._bump_timeout_checks(metric, last_timestamp)
Пример #2
0
    def create_source_plugin(
        self,
        source_id: str,
        source_config: Dict,
        rpc_function: PluginRPCFunctionType,
    ) -> Optional[SourcePlugin]:
        source_type = source_config["type"].replace("-", "_")

        if source_id not in self._source_plugins:
            full_module_name = f"metricq_wizard_plugin_{source_type}"
            if importlib.util.find_spec(full_module_name):
                plugin_module = importlib.import_module(full_module_name)
                entry_point: EntryPointType = plugin_module.get_plugin
                self._source_plugins[source_id] = entry_point(
                    source_config, rpc_function
                )
                self._source_config_revision[source_id] = source_config.get("_rev")
                self._source_plugin_creation_time[source_id] = Timestamp.now()
                self._source_plugin_initial_configured_metrics[source_id] = self._source_plugins[source_id].get_configured_metrics()
                logger.debug(
                    f"Currently configured metrics: {self._source_plugin_initial_configured_metrics[source_id]}")
            else:
                logger.error(
                    f"Plugin {full_module_name} for source {source_id} not found."
                )

        if source_id in self._source_plugins:
            return self._source_plugins[source_id]

        logger.error(f"Plugin instance for source {source_id} not found.")
        return None
Пример #3
0
def test_check_internal_chained_exception_report(check):
    errmsg_toplevel = "top-level exception"
    errmsg_cause = "chained exception"

    def check_metric_raising(*args, **kwargs):
        try:
            raise RuntimeError(errmsg_cause)
        except RuntimeError as e:
            raise RuntimeError(errmsg_toplevel) from e

    check.check_metric = check_metric_raising

    check.check("foo", tv_pairs=[TvPair(Timestamp(0), 0.0)])

    report: Report = check._report_queue._queue.get_nowait()

    assert report.service == check._name
    assert report.state == State.CRITICAL

    header, *causes = report.message.splitlines()

    assert errmsg_toplevel in header

    assert len(causes) == 1
    assert "caused by:" in causes[0]
    assert errmsg_cause in causes[0]
Пример #4
0
 def __init__(
     self,
     session_key: str,
 ):
     self.session_key: str = session_key
     self._source_plugins: Dict[str, SourcePlugin] = {}
     self._source_config_revision: Dict[str, str] = {}
     self._source_plugin_creation_time: Dict[str, Timestamp] = {}
     self._source_plugin_initial_configured_metrics: Dict[str, Sequence[str]] = {}
     self.creation_time = Timestamp.now()
Пример #5
0
 def _bacnet_reader_put_result_in_source_queue(self, device_name: str,
                                               device_address_string: str,
                                               result_values: Dict):
     fut = asyncio.run_coroutine_threadsafe(
         self._result_queue.put((Timestamp.now(), device_name,
                                 device_address_string, result_values)),
         loop=self.event_loop,
     )
     try:
         fut.result()
     except Exception:
         logger.exception("Can't put BACnet result in queue!")
Пример #6
0
    async def _on_data_chunk(self, metric: str, data_chunk):
        # check that all values in this data chunk are within the desired
        # thresholds
        await self._check_values(metric, data_chunk.value)

        # "bump" all timeout checks with the last timestamp for which we
        # received values, i.e. reset the asynchronous timers that would
        # fire if we do not receive value for too long.
        last_timestamp = Timestamp(sum(data_chunk.time_delta))
        await self._bump_timeout_checks(metric, last_timestamp)

        # flush all reports to the NSCA host
        await self._nsca_client.flush()
Пример #7
0
async def test_history_last_value(history_client: HistoryClient,
                                  mocker: MockerFixture):
    TIME = Timestamp(0)
    VALUE = mocker.sentinel.VALUE

    response = mock_history_response(
        time_delta=[TIME.posix_ns],
        value=[VALUE],
    )

    patch_history_data_request(mocker, response)

    assert await history_client.history_last_value(
        DEFAULT_METRIC) == TimeValue(timestamp=TIME, value=VALUE)
Пример #8
0
def test_check_internal_exception_log_entry(check, caplog):
    errmsg = "check_metric_raising"

    def check_metric_raising(*args, **kwargs):
        raise ValueError(errmsg)

    check.check_metric = check_metric_raising

    with caplog.at_level(logging.ERROR):
        check.check("foo", tv_pairs=[TvPair(Timestamp(0), 0.0)])

        assert errmsg in caplog.text

        assert any("Unhandled exception" in message
                   for logger, level, message in caplog.record_tuples)
Пример #9
0
def test_check_internal_exception_report(check):
    errmsg = "check_metric_raising"

    def check_metric_raising(*args, **kwargs):
        raise ValueError(errmsg)

    check.check_metric = check_metric_raising

    check.check("foo", tv_pairs=[TvPair(Timestamp(0), 0.0)])

    report: Report = check._report_queue._queue.get_nowait()

    assert report.service == check._name
    assert report.state == State.CRITICAL
    assert errmsg in report.message
Пример #10
0
async def test_history_aggregate(history_client: HistoryClient,
                                 mocker: MockerFixture):
    TIME = Timestamp(0)
    AGGREGATE = create_autospec(history_pb2.HistoryResponse.Aggregate,
                                set_spec=True)

    response = mock_history_response(
        time_delta=[TIME.posix_ns],
        aggregate=[AGGREGATE],
    )

    patch_history_data_request(mocker, response)

    assert await history_client.history_aggregate(
        DEFAULT_METRIC) == TimeAggregate.from_proto(timestamp=TIME,
                                                    proto=AGGREGATE)
    async def update(self):
        assert self.prev_timestamp is not None, "update() called before _on_config()"

        now = Timestamp.now()
        send_metrics = list()
        send_metrics.append(
            self.send("cpu.usage", now, sum(psutil.cpu_percent(percpu=True)))
        )

        for mem_name, mem_value in psutil.virtual_memory()._asdict().items():
            send_metrics.append(self.send(f"mem.{mem_name}", now, mem_value))

        for swap_name, swap_value in psutil.swap_memory()._asdict().items():
            send_metrics.append(self.send(f"swap.{swap_name}", now, swap_value))

        net_io = psutil.net_io_counters(pernic=True, nowrap=True)
        duration_s = (now - self.prev_timestamp).s
        for nic_name, net_values in net_io.items():
            prev_net_values = self.prev_net_io[nic_name]
            send_metrics.extend(
                [
                    self.send(
                        f"net.{nic_name}.sent.bytes",
                        now,
                        (net_values.bytes_sent - prev_net_values.bytes_sent)
                        / duration_s,
                    ),
                    self.send(
                        f"net.{nic_name}.sent.packets",
                        now,
                        (net_values.packets_sent - prev_net_values.packets_sent)
                        / duration_s,
                    ),
                    self.send(
                        f"net.{nic_name}.recv.bytes",
                        now,
                        (net_values.bytes_recv - prev_net_values.bytes_recv)
                        / duration_s,
                    ),
                    self.send(
                        f"net.{nic_name}.recv.packets",
                        now,
                        (net_values.packets_recv - prev_net_values.packets_recv)
                        / duration_s,
                    ),
                ]
            )

        disk_io = psutil.disk_io_counters(perdisk=True, nowrap=True)
        duration_s = (now - self.prev_timestamp).s
        for disk_name, disk_values in disk_io.items():
            prev_disk_values = self.prev_disk_io[disk_name]
            send_metrics.extend(
                [
                    self.send(
                        f"disk.{disk_name}.written.bytes",
                        now,
                        (disk_values.write_bytes - prev_disk_values.write_bytes)
                        / duration_s,
                    ),
                    self.send(
                        f"disk.{disk_name}.written.count",
                        now,
                        (disk_values.write_count - prev_disk_values.write_count)
                        / duration_s,
                    ),
                    self.send(
                        f"disk.{disk_name}.read.bytes",
                        now,
                        (disk_values.read_bytes - prev_disk_values.read_bytes)
                        / duration_s,
                    ),
                    self.send(
                        f"disk.{disk_name}.read.count",
                        now,
                        (disk_values.read_count - prev_disk_values.read_count)
                        / duration_s,
                    ),
                ]
            )

        self.prev_disk_io = disk_io
        self.prev_net_io = net_io
        self.prev_timestamp = now

        await asyncio.gather(*send_metrics)
    async def _on_config(self, **config):
        logger.info("config: {}", config)
        rate = config["rate"]
        self.period = Timedelta.from_s(1 / rate)
        try:
            self.prefix = config["prefix"]
            if self.prefix != "" and not self.prefix.endswith("."):
                self.prefix = self.prefix + "."
        except KeyError:
            logger.info("No explicit prefix given, using hostname")
            self.prefix = socket.gethostname() + "."

        meta = dict()

        # Initialize CPU usage:
        psutil.cpu_percent(percpu=True)
        meta["cpu.usage"] = {
            "rate": rate,
            "description": "CPU usage (100% = 1 logical CPU busy)",
            "unit": "%",
        }

        # Initialize memory
        for mem_name in psutil.virtual_memory()._fields:
            meta[f"mem.{mem_name}"] = {
                "rate": rate,
                "description": "See https://psutil.readthedocs.io/en/latest/#psutil.virtual_memory",
                "unit": "%" if mem_name == "percent" else "B",
            }

        for swap_name in psutil.swap_memory()._fields:
            meta[f"swap.{swap_name}"] = {
                "rate": rate,
                "description": "See https://psutil.readthedocs.io/en/latest/#psutil.swap_memory",
                "unit": "%" if swap_name == "percent" else "B",
            }

        # Network
        self.prev_net_io = psutil.net_io_counters(pernic=True, nowrap=True)
        self.prev_timestamp = Timestamp.now()
        for nic_name in self.prev_net_io.keys():
            for sr in "sent", "recv":
                meta[f"net.{nic_name}.{sr}.bytes"] = {
                    "rate": rate,
                    "description": f"Total data {sr} on nic {nic_name}",
                    "unit": "B/s",
                }
                meta[f"net.{nic_name}.{sr}.packets"] = {
                    "rate": rate,
                    "description": f"Number of packets {sr} on nic {nic_name}",
                    "unit": "Hz",
                }

        # Disk
        self.prev_disk_io = psutil.disk_io_counters(perdisk=True, nowrap=True)
        for disk_name in self.prev_disk_io.keys():                
            for rw in "read", "written":
                meta[f"disk.{disk_name}.{rw}.count"] = {
                    "rate": rate,
                    "description": f"Number of {rw}s on partition {disk_name}",
                    "unit": "Hz",
                }
                meta[f"disk.{disk_name}.{rw}.bytes"] = {
                    "rate": rate,
                    "description": f"Total data {rw} on partition {disk_name}",
                    "unit": "B/s",
                }

        await self.declare_metrics(
            {self.prefix + key: value for key, value in meta.items()}
        )
Пример #13
0
    async def _worker_task(self, object_group, worker_task_stop_future):
        start_time = Timestamp.now()
        interval = object_group["interval"]
        device_address_str = object_group["device_address_str"]
        object_type = object_group["object_type"]
        objects = [(object_type, instance)
                   for instance in object_group["object_instances"]]
        chunk_size = object_group.get("chunk_size")

        logger.debug(
            f"starting BACnetSource worker task for device {device_address_str}"
        )

        logger.debug(
            "This is {} the main thread.",
            "" if threading.current_thread() == threading.main_thread() else
            "not",
        )

        # wait for random time between 10 ms and 10.01s
        random_wait_time = random.random() * 10 + 0.01
        await asyncio.sleep(random_wait_time)
        self._worker_tasks_count_starting += 1

        await self.event_loop.run_in_executor(
            None,
            functools.partial(
                self._bacnet_reader.request_device_properties,
                device_address_str,
                skip_when_cached=True,
                request_timeout=Timedelta.from_s(30),
            ),
        )
        await self.event_loop.run_in_executor(
            None,
            functools.partial(
                self._bacnet_reader.request_object_properties,
                device_address_str,
                objects,
                skip_when_cached=True,
                chunk_size=chunk_size,
                request_timeout=Timedelta.from_s(30),
            ),
        )

        device_info = self._bacnet_reader.get_device_info(
            device_address_str,
            device_identifier=object_group.get("device_identifier"))
        if device_info is None:
            logger.error("Missing device info for {}. Stopping worker task!",
                         device_address_str)
            self._worker_tasks_count_failed += 1
            return

        device_name = self._object_name_vendor_specific_mapping.get(
            device_info["objectName"], device_info["objectName"])

        device_name = substitute_all(
            device_name, self._object_name_vendor_specific_substitutions)

        metrics = {}
        missing_metrics = 0

        for object_instance in object_group["object_instances"]:
            metadata = {
                "rate": 1.0 / interval,
                "device": device_address_str,
                "objectType": object_type,
                "objectInstance": object_instance,
            }
            object_info = self._bacnet_reader.get_object_info(
                device_address_str, object_type, object_instance)
            if (object_info is None or "objectName" not in object_info
                    or "description" not in object_info):
                logger.error(
                    "No object info for ({}, {}) of {} available!",
                    object_type,
                    object_instance,
                    device_address_str,
                )
                missing_metrics += 1
                continue

            # Get vendor-specific-address from object cache
            object_name = object_info.get("3000", object_info["objectName"])

            object_name = self._object_name_vendor_specific_mapping.get(
                object_name, object_name)

            object_name = substitute_all(
                object_name, self._object_name_vendor_specific_substitutions)

            metric_id = (Template(object_group["metric_id"]).safe_substitute({
                "objectName":
                object_name,
                "deviceName":
                device_name
            }).replace("'", ".").replace("`",
                                         ".").replace("´",
                                                      ".").replace(" ", ""))
            if "description" in object_group:
                description = (Template(
                    object_group["description"]).safe_substitute({
                        "objectName":
                        object_name,
                        "objectDescription":
                        object_info["description"],
                        "deviceName":
                        device_name,
                        "deviceDescription":
                        device_info["description"],
                    }).replace("'", ".").replace("`", ".").replace("´", "."))
                metadata["description"] = substitute_all(
                    description,
                    self._object_description_vendor_specific_substitutions)
            if "units" in object_info:
                metadata["unit"] = object_info["units"]

            metrics[metric_id] = metadata

        try:
            await self.declare_metrics(metrics)
        except RPCError:
            logger.exception(
                f"Can't declare metadata for device {device_address_str}. Stopping worker task!"
            )
            self._worker_tasks_count_failed += 1
            return

        segmentationSupport = "unknown"
        device_address = Address(device_address_str)
        device_info = self._bacnet_reader.deviceInfoCache.get_device_info(
            device_address)
        if device_info:
            segmentationSupport = device_info.segmentationSupported

        start_duration = Timestamp.now() - start_time

        logger.info(
            f"Started BACnetSource worker task for device {device_address_str}! Took {start_duration.s - random_wait_time:.2f} s (waited {random_wait_time:.2f} s), {missing_metrics} metrics have no object info"
        )

        self._worker_tasks_count_running += 1
        deadline = Timestamp.now()
        while True:
            self._bacnet_reader.request_values(device_address_str,
                                               objects,
                                               chunk_size=chunk_size)

            if object_group.get("nan_at_timeout"):
                for metric_id in metrics:
                    now = Timestamp.now()
                    last_timestamp = self._last_time_send_by_metric.get(
                        metric_id, now)
                    if now - last_timestamp >= Timedelta.from_s(6 * interval):
                        timestamp_nan = last_timestamp + Timedelta.from_s(
                            5 * interval)
                        await self.send(metric_id, timestamp_nan, float("nan"))
                        self._last_time_send_by_metric[
                            metric_id] = timestamp_nan

                        logger.warn(
                            "Timeout for metric {} reached. Sending NaN! Device: {}",
                            metric_id,
                            device_address_str,
                        )
            try:
                deadline += Timedelta.from_s(interval)
                now = Timestamp.now()
                while now >= deadline:
                    logger.warn(
                        "Missed deadline {}, it is now {}. Device: {}, {}, chunk size: {}",
                        deadline,
                        now,
                        device_address_str,
                        segmentationSupport,
                        chunk_size,
                    )
                    deadline += Timedelta.from_s(interval)

                timeout = (deadline - now).s
                await asyncio.wait_for(asyncio.shield(worker_task_stop_future),
                                       timeout=timeout)
                worker_task_stop_future.result()
                logger.info("stopping BACnetSource worker task")
                break
            except asyncio.TimeoutError:
                # This is the normal case, just continue with the loop
                continue
Пример #14
0
    async def task(self):
        self._main_task_stop_future = self.event_loop.create_future()

        logger.info(
            f"Current worker count (expected/starting/running/failed): ({self._worker_tasks_count_expected}/{self._worker_tasks_count_starting}/{self._worker_tasks_count_running}/{self._worker_tasks_count_failed})"
        )
        last_state_log = Timestamp.now()

        while True:
            queue_get_task = asyncio.create_task(self._result_queue.get())
            done, pending = await asyncio.wait(
                {queue_get_task, self._main_task_stop_future},
                return_when=asyncio.FIRST_COMPLETED,
            )

            if queue_get_task in done:
                result: Tuple[Timestamp, str, str,
                              Dict] = queue_get_task.result()

                timestamp, device_name, device_address_string, result_values = result

                device_config = self._device_config[device_address_string]
                device_name = self._object_name_vendor_specific_mapping.get(
                    device_name, device_name)

                device_name = substitute_all(
                    device_name,
                    self._object_name_vendor_specific_substitutions)

                for object_name, object_result in result_values.items():
                    object_name = self._object_name_vendor_specific_mapping.get(
                        object_name, object_name)

                    object_name = substitute_all(
                        object_name,
                        self._object_name_vendor_specific_substitutions)

                    # TODO maybe support more placeholders
                    metric_id = (Template(
                        device_config["metric_id"]).safe_substitute({
                            "objectName":
                            object_name,
                            "deviceName":
                            device_name
                        }).replace("'", ".").replace("`", ".").replace(
                            "´", ".").replace(" ", ""))
                    if "presentValue" in object_result and isinstance(
                            object_result["presentValue"], (int, float)):
                        await self.send(metric_id, timestamp,
                                        object_result["presentValue"])
                        self._last_time_send_by_metric[metric_id] = timestamp

                self._result_queue.task_done()

            if Timestamp.now() - last_state_log > Timedelta.from_string(
                    "5min"):
                logger.info(
                    f"Current worker count (expected/starting/running/failed): ({self._worker_tasks_count_expected}/{self._worker_tasks_count_starting}/{self._worker_tasks_count_running}/{self._worker_tasks_count_failed})"
                )
                last_state_log = Timestamp.now()

            if self._main_task_stop_future in done:
                logger.info("stopping BACnetSource main task")
                break
Пример #15
0
        self.value = value

    async def task(self):
        await self.send(self.metric, time=self.timestamp, value=self.value)
        await self.stop()


@click.command()
@click_log.simple_verbosity_option(logger, default="warning")
@click.version_option(version=client_version)
@metricq_server_option()
@metricq_token_option(default="source-send")
@click.option(
    "--timestamp",
    type=TIMESTAMP,
    default=Timestamp.now(),
    show_default="now",
    help="Timestamp to send.",
)
@click.argument("metric", required=True)
@click.argument("value", required=True, type=float)
def main(server: str, token: str, timestamp: Timestamp, metric: Metric, value: float):
    """Send a single time-value pair for the given metric."""
    send = MetricQSend(
        token=token,
        management_url=server,
        metric=metric,
        timestamp=timestamp,
        value=value,
    )