def __init__(self): super(PluginPollingDeviceInterfaceMetrics, self).__init__() self._device_interface_metrics = PanoptesMetricsGroupSet() self._polling_status = None self._interface_metrics_group = None self._dot3stats_map = None self._if_table_stats_map = None self._ifx_table_stats_map = None self._DIMENSION_MAP = { u'alias': self.get_alias, u'media_type': self.get_media_type, u'description': self.get_description, u'configured_speed': self.get_configured_speed, u'port_speed': self.get_port_speed, u'interface_name': self.get_interface_name, u'parent_interface_name': self.get_parent_interface_name, u'parent_interface_media_type': self.get_parent_interface_media_type, u'parent_interface_configured_speed': self.get_parent_interface_configured_speed, u'parent_interface_port_speed': self.get_parent_interface_port_speed }
def run(self, context): metric_group_set = PanoptesMetricsGroupSet() metric1 = PanoptesMetric("test", 0.0, PanoptesMetricType.GAUGE) metric_group = PanoptesMetricsGroup(self.panoptes_resource, "Test", _TEST_INTERVAL) metric_group.add_metric(metric1) metric_group_set.add(metric_group) return metric_group_set
def __init__(self, plugin_context, device_resource, execute_frequency): self._plugin_context = plugin_context self._logger = plugin_context.logger self._enrichment = plugin_context.enrichment self._device_resource = device_resource self._device_fqdn = device_resource.resource_endpoint self._execute_frequency = execute_frequency self._device_heartbeat_metrics = PanoptesMetricsGroupSet()
def run(self, context): logger = context.logger resource = context.data host = resource.resource_endpoint config = context.config[u'main'] execute_frequency = int(config[u'execute_frequency']) start_time = time() try: count = int(config[u'count']) except KeyError: count = DEFAULT_PING_COUNT logger.info(u'For device {}, count not set - setting it to {}'.format(host, DEFAULT_PING_COUNT)) except ValueError: raise PanoptesPollingPluginConfigurationError( u'For device {}, configured count is not an integer: {}'.format(host, config[u'count'])) try: timeout = int(config[u'timeout']) except KeyError: timeout = DEFAULT_PING_TIMEOUT logger.info(u'For device {}, timeout not set - setting it to {}s'.format(host, DEFAULT_PING_TIMEOUT)) except ValueError: raise PanoptesPollingPluginConfigurationError( u'For device {}, configured timeout is not an integer: {}'.format(host, config[u'timeout'])) ping_metrics_group = PanoptesMetricsGroup(resource, u'ping', execute_frequency) try: panoptes_ping = PanoptesPing(hostname=host, count=count, timeout=timeout) for metric, object_property in list(PING_METRICS.items()): ping_metrics_group.add_metric(PanoptesMetric(metric, getattr(panoptes_ping, object_property), PanoptesMetricType.GAUGE)) if panoptes_ping.packet_loss_pct == 100.0: ping_status = DEVICE_METRICS_STATES.PING_FAILURE else: ping_status = DEVICE_METRICS_STATES.SUCCESS except Exception as e: logger.warn(u'For device {}, ping failed: {}'.format(host, repr(e))) ping_status = DEVICE_METRICS_STATES.PING_FAILURE ping_metrics_group.add_metric(PanoptesMetric(u'ping_status', ping_status, PanoptesMetricType.GAUGE)) logger.debug(u'For device {}, ping results are: {}'.format(host, str(ping_metrics_group.json))) ping_metrics_group_set = PanoptesMetricsGroupSet() ping_metrics_group_set.add(ping_metrics_group) end_time = time() logger.info(u'Done pinging device "{}" in {} seconds, {} metric groups'.format(host, round(end_time - start_time, 2), len(ping_metrics_group_set))) return ping_metrics_group_set
def run(self, context): self._plugin_context = context self._logger = context.logger self._device = context.data self._device_host = self._device.resource_endpoint self._device_model = self._device.resource_metadata.get( u'model', u'unknown') self._execute_frequency = int( context.config[u'main'][u'execute_frequency']) self._snmp_connection = None self._asr_device_metrics = PanoptesMetricsGroupSet() try: polling_status_metric_name = context.config[u'main'][ u'polling_status_metric_name'] except: self._logger.error( u'Polling status metric name not defined for %s' % self._device_host) raise PanoptesPollingPluginConfigurationError( u'Polling status metric name not defined for %s' % self._device_host) self._polling_status = PanoptesPollingStatus( resource=self._device, execute_frequency=self._execute_frequency, logger=self._logger, metric_name=polling_status_metric_name) self._max_repetitions = _MAX_REPETITIONS # Todo self._logger.info( u'Going to poll ASR Device "%s" (model "%s") for device metrics' % (self._device_host, self._device_model)) start_time = time.time() device_results = self.get_device_metrics() end_time = time.time() if device_results: self._logger.info( u'Done polling ASR Device metrics for device "%s" in %.2f seconds, %s metrics' % (self._device_host, end_time - start_time, len(device_results))) else: self._logger.warn( u'Error polling device metrics for ASR Device "%s" (model "%s")' % (self._device_host, self._device_model)) return device_results
def prepare_panoptes_metrics_group_set(self, file_path=None): panoptes_metric_group_set = PanoptesMetricsGroupSet() path_to_metrics_file = plugin_results_file if file_path is None else file_path with open(path_to_metrics_file) as results_file: panoptes_json_data = json.load(results_file) for panoptes_data_object in panoptes_json_data: resource = panoptes_data_object[u'resource'] panoptes_resource = PanoptesResource( resource_site=resource[u'resource_site'], resource_class=resource[u'resource_class'], resource_subclass=resource[u'resource_subclass'], resource_type=resource[u'resource_type'], resource_id=resource[u'resource_id'], resource_endpoint=resource[u'resource_endpoint'], resource_plugin=resource[u'resource_plugin'], resource_creation_timestamp=0) panoptes_metric_group = PanoptesMetricsGroup( resource=panoptes_resource, group_type=panoptes_data_object[u'metrics_group_type'], interval=panoptes_data_object[u'metrics_group_interval'] ) for dimension in panoptes_data_object[u'dimensions']: panoptes_metric_group.add_dimension( PanoptesMetricDimension( name=dimension[u'dimension_name'], value=dimension[u'dimension_value'] ) ) for metric in panoptes_data_object[u'metrics']: panoptes_metric_group.add_metric( PanoptesMetric( metric_name=metric[u'metric_name'], metric_value=metric[u'metric_value'], metric_type=PanoptesMetricType().GAUGE if metric[u'metric_type'] == u'gauge' else PanoptesMetricType().COUNTER, metric_creation_timestamp=metric[u'metric_creation_timestamp'] ) ) panoptes_metric_group_set.add(panoptes_metric_group) return panoptes_metric_group_set
class HeartbeatMetrics(object): def __init__(self, plugin_context, device_resource, execute_frequency): self._plugin_context = plugin_context self._logger = plugin_context.logger self._enrichment = plugin_context.enrichment self._device_resource = device_resource self._device_fqdn = device_resource.resource_endpoint self._execute_frequency = execute_frequency self._device_heartbeat_metrics = PanoptesMetricsGroupSet() def get_metrics(self): try: logger = self._logger events_ts_metric_group = PanoptesMetricsGroup( self._device_resource, u'heartbeat', self._execute_frequency) events_ts_metric_group.add_metric( PanoptesMetric(u'status', 1, PanoptesMetricType.GAUGE)) events_ts_metric_group.add_metric( PanoptesMetric(u'heartbeat_enrichment_timestamp', self._get_enrichment_ts(), PanoptesMetricType.GAUGE)) self._device_heartbeat_metrics.add(events_ts_metric_group) logger.debug( u'Heartbeat metrics for host {} PanoptesMetricsGroupSet {}'. format(self._device_fqdn, self._device_heartbeat_metrics)) return self._device_heartbeat_metrics except Exception as e: raise PanoptesPollingPluginError( u'Failed to get timestamp metrics for the host "%s": %s' % (self._device_fqdn, repr(e))) def _get_enrichment_ts(self): try: heartbeat_enrichment = self._enrichment.get_enrichment_value( u'self', u'heartbeat_ns', u'heartbeat') heartbeat_enrichment_timestamp = heartbeat_enrichment[u'timestamp'] return int(heartbeat_enrichment_timestamp) except Exception as e: self._logger.error( u'Error while fetching enrichment heartbeat timestamp for Host {}: {}' .format(self._device_fqdn, repr(e))) return -1
def __init__(self): self._plugin_context: PanoptesPluginContext = None self._config: Dict[str, Any] = {} self._panoptes_metrics_group_set: PanoptesMetricsGroupSet = PanoptesMetricsGroupSet( ) self._device: PanoptesResource = None self._execute_frequency: int = 60 self._logger = None self.napalm_device_connection = None super(NapalmPollingPlugin, self).__init__()
def __init__(self): self._plugin_context = None self._logger = None self._device = None self._device_host = None self._device_model = None self._execute_frequency = None self._snmp_connection = None self._asr_device_metrics = PanoptesMetricsGroupSet() self._polling_status = None self._max_repetitions = None self._cpu_metrics = None self._memory_metrics = None self._temp_metrics = None self._power_metrics = None self._crypto_metrics = None self._load_metrics = None super(PluginPollingASRDeviceMetrics, self).__init__()
def test_metrics_group_hash(self): now = round(time.time(), METRICS_TIMESTAMP_PRECISION) metrics_group = PanoptesMetricsGroup(self.__panoptes_resource, 'test', 120) metrics_group_two = PanoptesMetricsGroup(self.__panoptes_resource, 'test', 120) dimension = PanoptesMetricDimension('if_alias', 'bar') metric = PanoptesMetric('test_metric', 0, PanoptesMetricType.GAUGE, metric_creation_timestamp=now) metric_diff_timestamp = PanoptesMetric('test_metric', 0, PanoptesMetricType.GAUGE, metric_creation_timestamp=now + 0.01) metrics_group.add_dimension(dimension) metrics_group_two.add_dimension(dimension) self.assertEqual(metrics_group.__hash__(), metrics_group_two.__hash__()) metrics_group.add_metric(metric) metrics_group_two.add_metric(metric_diff_timestamp) self.assertEqual(metrics_group.__hash__(), metrics_group_two.__hash__()) metrics_group_set = PanoptesMetricsGroupSet() metrics_group_set.add(metrics_group) metrics_group_set.add(metrics_group_two) assert len(metrics_group_set) == 1
def run(self, context): self._plugin_context = context self._logger = context.logger self._device = context.data self._device_host = self._device.resource_endpoint self._device_model = self._device.resource_metadata.get( u'model', u'unknown') self._execute_frequency = int( context.config[u'main'][u'execute_frequency']) self._snmp_connection = None self._arista_device_metrics = PanoptesMetricsGroupSet() self._polling_status = PanoptesPollingStatus( resource=self._device, execute_frequency=self._execute_frequency, logger=self._logger) self._max_repetitions = _MAX_REPETITIONS self._logger.info( u'Going to poll Arista device "%s" (model "%s") for device metrics' % (self._device_host, self._device_model)) start_time = time.time() device_results = self.get_device_metrics() end_time = time.time() if device_results: self._logger.info( u'Done polling Arista Device metrics for device "%s" in %.2f seconds, %s metrics' % (self._device_host, end_time - start_time, len(device_results))) else: self._logger.warn( u'Error polling device metrics for Arista device %s' % self._device_host) return device_results
class PluginPollingDeviceInterfaceMetrics(PanoptesSNMPBasePlugin, PanoptesPollingPlugin): def __init__(self): super(PluginPollingDeviceInterfaceMetrics, self).__init__() self._device_interface_metrics = PanoptesMetricsGroupSet() self._polling_status = None self._interface_metrics_group = None self._dot3stats_map = None self._if_table_stats_map = None self._ifx_table_stats_map = None self._DIMENSION_MAP = { 'alias': self.get_alias, 'media_type': self.get_media_type, 'description': self.get_description, 'configured_speed': self.get_configured_speed, 'port_speed': self.get_port_speed, 'interface_name': self.get_interface_name, 'parent_interface_name': self.get_parent_interface_name, 'parent_interface_media_type': self.get_parent_interface_media_type, 'parent_interface_configured_speed': self.get_parent_interface_configured_speed, 'parent_interface_port_speed': self.get_parent_interface_port_speed } # Dimensions def get_interface_name(self, interface_index): return self.enrichment.get_enrichment_value('self', 'interface', interface_index).get('interface_name') def get_alias(self, interface_index): return self.enrichment.get_enrichment_value('self', 'interface', interface_index).get('alias') def get_description(self, interface_index): return self.enrichment.get_enrichment_value('self', 'interface', interface_index).get('description') def get_media_type(self, interface_index): return self.enrichment.get_enrichment_value('self', 'interface', interface_index).get('media_type') def get_port_speed(self, interface_index): return self.enrichment.get_enrichment_value('self', 'interface', interface_index).get('port_speed') def get_parent_interface_name(self, interface_index): return self.enrichment.get_enrichment_value('self', 'interface', interface_index).get('parent_interface_name') def get_parent_interface_media_type(self, interface_index): return self.enrichment.get_enrichment_value('self', 'interface', interface_index).get( 'parent_interface_media_type') def get_parent_interface_port_speed(self, interface_index): return self.enrichment.get_enrichment_value('self', 'interface', interface_index).get( 'parent_interface_port_speed') def get_parent_interface_configured_speed(self, interface_index): return self.enrichment.get_enrichment_value('self', 'interface', interface_index).get( 'parent_interface_configured_speed') # Metrics def get_bits_in(self, interface_index): if (ifHCInOctets + '.' + interface_index) in self._ifx_table_stats_map: return int(self._ifx_table_stats_map[ifHCInOctets + '.' + interface_index]) * 8 else: return _MISSING_METRIC_VALUE def get_unicast_packets_in(self, interface_index): return int(self._ifx_table_stats_map.get(ifHCInUcastPkts + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_bits_out(self, interface_index): if (ifHCOutOctets + '.' + interface_index) in self._ifx_table_stats_map: return int(self._ifx_table_stats_map[ifHCOutOctets + '.' + interface_index]) * 8 else: return _MISSING_METRIC_VALUE def get_unicast_packets_out(self, interface_index): return int(self._ifx_table_stats_map.get(ifHCOutUcastPkts + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_multicast_packets_in(self, interface_index): return int(self._ifx_table_stats_map.get(ifHCInMulticastPkts + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_multicast_packets_out(self, interface_index): return int(self._ifx_table_stats_map.get(ifHCOutMulticastPkts + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_broadcast_packets_in(self, interface_index): return int(self._ifx_table_stats_map.get(ifHCInBroadcastPkts + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_broadcast_packets_out(self, interface_index): return int(self._ifx_table_stats_map.get(ifHCOutBroadcastPkts + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_total_packets_in(self, interface_index): unicast_packets_in = self.get_unicast_packets_in(interface_index) multicast_packets_in = self.get_multicast_packets_in(interface_index) broadcast_packets_in = self.get_broadcast_packets_in(interface_index) if _MISSING_METRIC_VALUE not in [unicast_packets_in, multicast_packets_in, broadcast_packets_in]: return unicast_packets_in + multicast_packets_in + broadcast_packets_in else: return _MISSING_METRIC_VALUE def get_total_packets_out(self, interface_index): unicast_packets_out = self.get_unicast_packets_out(interface_index) multicast_packets_out = self.get_multicast_packets_out(interface_index) broadcast_packets_out = self.get_broadcast_packets_out(interface_index) if _MISSING_METRIC_VALUE not in [unicast_packets_out, multicast_packets_out, broadcast_packets_out]: return unicast_packets_out + multicast_packets_out + broadcast_packets_out else: return _MISSING_METRIC_VALUE def get_admin_state(self, interface_index): return int(self._if_table_stats_map.get(ifAdminStatus + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_oper_state(self, interface_index): return int(self._if_table_stats_map.get(ifOperStatus + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_oper_admin_state_mismatch(self, interface_index): return 0 if self.get_oper_state(interface_index) == self.get_admin_state(interface_index) else 1 def get_discards_in(self, interface_index): return int(self._if_table_stats_map.get(ifInDiscards + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_errors_in(self, interface_index): return int(self._if_table_stats_map.get(ifInErrors + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_discards_out(self, interface_index): return int(self._if_table_stats_map.get(ifOutDiscards + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_errors_out(self, interface_index): return int(self._if_table_stats_map.get(ifOutErrors + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_mtu(self, interface_index): return int(self._if_table_stats_map.get(ifMtu + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_if_high_speed(self, interface_index): # n.b. adjusted value means I can't use 'get(..., _MISSING_METRIC_VALUE)' idiom # Mbps by definition return int(self._ifx_table_stats_map.get(ifHighSpeed + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_if_speed(self, interface_index): return int(self._if_table_stats_map.get(ifSpeed + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_configured_speed(self, index): return self.enrichment.get_enrichment_value('self', 'interface', index).get('configured_speed', _MISSING_METRIC_VALUE) def get_errors_frame(self, interface_index): return int(self._dot3stats_map.get(dot3StatsAlignmentErrors + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_errors_crc(self, interface_index): return int(self._dot3stats_map.get(dot3StatsFCSErrors + '.' + interface_index, _MISSING_METRIC_VALUE)) def get_errors_giants(self, interface_index): return int(self._dot3stats_map.get(dot3StatsFrameTooLongs + '.' + interface_index, _MISSING_METRIC_VALUE)) @threaded_cached_property def interface_indices(self): result = set() for oid in self._ifx_table_stats_map: result.add(oid.split('.')[-1]) return result def _getif_table_stats(self): result = dict() try: for oid in self._if_table_stats_map: index = oid.split('.')[-1] result[index] = dict() result[index]['admin_state'] = self.get_admin_state(index) result[index]['oper_state'] = self.get_oper_state(index) result[index]['oper_admin_state_mismatch'] = self.get_oper_admin_state_mismatch(index) result[index]['errors_in'] = self.get_errors_in(index) result[index]['errors_out'] = self.get_errors_out(index) result[index]['discards_in'] = self.get_discards_in(index) result[index]['discards_out'] = self.get_discards_out(index) result[index]['mtu'] = self.get_mtu(index) return result except Exception as e: self._polling_status.handle_exception('interface', e) def _getifx_table_stats(self): result = dict() try: for oid in self._ifx_table_stats_map: index = oid.split('.')[-1] result[index] = dict() result[index]['bits_in'] = self.get_bits_in(index) result[index]['bits_out'] = self.get_bits_out(index) result[index]['unicast_packets_in'] = self.get_unicast_packets_in(index) result[index]['unicast_packets_out'] = self.get_unicast_packets_out(index) result[index]['multicast_packets_in'] = self.get_multicast_packets_in(index) result[index]['multicast_packets_out'] = self.get_multicast_packets_out(index) result[index]['broadcast_packets_in'] = self.get_broadcast_packets_in(index) result[index]['broadcast_packets_out'] = self.get_broadcast_packets_out(index) result[index]['total_packets_in'] = self.get_total_packets_in(index) result[index]['total_packets_out'] = self.get_total_packets_out(index) result[index]['configured_speed'] = self.get_configured_speed(index) return result except Exception as e: self._polling_status.handle_exception('interface', e) def _getdot3stats(self): result = dict() try: # Use ifx_table_stats_map b/c dot3stats is not defined for every machine for oid in self._ifx_table_stats_map: index = oid.split('.')[-1] result[index] = dict() result[index]['errors_frame'] = self.get_errors_frame(index) result[index]['errors_crc'] = self.get_errors_crc(index) result[index]['errors_giants'] = self.get_errors_giants(index) return result except Exception as e: self._polling_status.handle_exception('interface', e) @staticmethod def _get_state_val(state): s = int(state) if s == 2: return _INTERFACE_STATES.DOWN elif s == 1: return _INTERFACE_STATES.UP else: return _INTERFACE_STATES.UNKNOWN def _build_ifx_table_stats_map(self): """Maps child oids of ifXTable to their respective values as PanoptesSNMPVariables""" ifx_table_stats = list() for metric in ifx_table_oids: for varbind in self._snmp_connection.bulk_walk(metric, max_repetitions=self.snmp_configuration.max_repetitions): ifx_table_stats.append(varbind) self._ifx_table_stats_map = dict() for ent in ifx_table_stats: self._ifx_table_stats_map[ent.oid + '.' + ent.index] = ent.value def _build_if_table_stats_map(self): """Maps child oids of ifTable to their respective values as PanoptesSNMPVariables""" if_table_stats = list() for metric in if_table_oids: for varbind in self._snmp_connection.bulk_walk(metric, max_repetitions=self.snmp_configuration.max_repetitions): if_table_stats.append(varbind) self._if_table_stats_map = dict() for ent in if_table_stats: self._if_table_stats_map[ent.oid + '.' + ent.index] = ent.value def _build_dot3stats_map(self): """Maps child oids of dot3statsTable to their respective values as PanoptesSNMPVariables""" dot3stats = list() for metric in dots3stats_table_oids: for varbind in self._snmp_connection.bulk_walk(metric, max_repetitions=self.snmp_configuration.max_repetitions): dot3stats.append(varbind) self._dot3stats_map = dict() for ent in dot3stats: self._dot3stats_map[ent.oid + '.' + ent.index] = ent.value def _smart_add_dimension(self, method, dimension_name, index): dimension = method(index) if dimension is not None and PanoptesValidators.valid_nonempty_string(str(dimension)): self._interface_metrics_group.add_dimension(PanoptesMetricDimension(dimension_name, str(dimension))) else: self._interface_metrics_group.add_dimension(PanoptesMetricDimension(dimension_name, _DEFAULT_DIMENSION_VALUE)) def get_results(self): self._polling_status = PanoptesPollingStatus(resource=self.resource, execute_frequency=self.execute_frequency, logger=self.logger, metric_name='interface_polling_status') interface_metrics = dict() try: start_time = time.time() self._build_dot3stats_map() self._build_if_table_stats_map() self._build_ifx_table_stats_map() end_time = time.time() self._logger.info('SNMP calls for device %s completed in %.2f seconds' % ( self.host, end_time - start_time)) interface_metrics.update(self._getdot3stats()) if_interface_metrics = self._getif_table_stats() ifx_interface_metrics = self._getifx_table_stats() # https://github.com/PyCQA/pylint/issues/1694 for i in self.interface_indices: # pylint: disable=E1133 if i not in interface_metrics: interface_metrics[i] = dict() interface_metrics[i].update(ifx_interface_metrics[i]) interface_metrics[i].update(if_interface_metrics[i]) for interface_index in interface_metrics.keys(): self._interface_metrics_group = PanoptesMetricsGroup(self.resource, 'interface', self.execute_frequency) interface = interface_metrics[interface_index] for dimension_name, dimension_method in self._DIMENSION_MAP.items(): self._smart_add_dimension(method=dimension_method, dimension_name=dimension_name, index=interface_index ) for metric in interface.keys(): metric_type = _METRIC_TYPE_MAP[metric] if not isinstance(interface[metric], numbers.Number): self._interface_metrics_group.add_metric(PanoptesMetric(str(metric), _MISSING_METRIC_VALUE, metric_type)) else: self._interface_metrics_group.add_metric(PanoptesMetric(str(metric), interface[metric], metric_type)) self._device_interface_metrics.add(self._interface_metrics_group) self._polling_status.handle_success('interface') self._logger.debug('Found interface metrics: "%s" for device "%s"' % ( interface_metrics, self.host)) except Exception as e: self._polling_status.handle_exception('interface', e) finally: self._device_interface_metrics.add(self._polling_status.device_status_metrics_group) return self._device_interface_metrics
class PluginPollingASRDeviceMetrics(PanoptesPollingPlugin): def __init__(self): self._plugin_context = None self._logger = None self._device = None self._device_host = None self._device_model = None self._execute_frequency = None self._snmp_connection = None self._asr_device_metrics = PanoptesMetricsGroupSet() self._polling_status = None self._max_repetitions = None self._cpu_metrics = None self._memory_metrics = None self._temp_metrics = None self._power_metrics = None self._crypto_metrics = None self._load_metrics = None super(PluginPollingASRDeviceMetrics, self).__init__() def _get_crypto_cpu_interval(self): if self._execute_frequency < 60: return u'2' elif 60 < self._execute_frequency < 300: return u'3' elif 300 < self._execute_frequency < 900: return u'4' else: return u'2' def _get_qfp_interval(self): if 5 <= self._execute_frequency < 60: return u'1' elif 60 <= self._execute_frequency < 300: return u'2' elif 300 <= self._execute_frequency < 3600: return u'3' elif 3600 <= self._execute_frequency: return u'4' else: return u'2' # TODO is mutable type allowed? def _get_entity_indices(self, ent_physical_class, ent_strings): ent_indices = [] # https://github.com/PyCQA/pylint/issues/1694 for ent in self.entities: # pylint: disable=E1133 value = ent.value if isinstance(value, bytes): value = value.decode(u'ascii', u'ignore') physical_class = entPhysicalClassPrefix + u'.' + ent.index.split( u'.')[-1] # pylint: disable=E1133 physical_class_index = int( self.entity_physical_entries_map[physical_class]) # pylint: disable=E1136 if ENT_PHYSICAL_CLASSES[ physical_class_index] == ent_physical_class: for s in ent_strings: if s in value: ent_indices.append(ent.index) return ent_indices @threaded_cached_property def entities(self): return self._snmp_connection.bulk_walk(oid=entPhysicalEntry, non_repeaters=0, max_repetitions=25) @threaded_cached_property def entity_physical_entries_map(self): """Maps child oids of entPhysicalEntry to their respective values as PanoptesSNMPVariables""" ent_physical_entries_map = {} # https://github.com/PyCQA/pylint/issues/1694 for ent in self.entities: # pylint: disable=E1133 value = ent.value if isinstance(value, bytes): value = value.decode(u'ascii', u'ignore') ent_physical_entries_map[ent.oid + u'.' + ent.index] = value return ent_physical_entries_map @threaded_cached_property def sensor_entities(self): return self._snmp_connection.bulk_walk(oid=entSensorValueEntry, non_repeaters=0, max_repetitions=25) @threaded_cached_property def sensor_entity_map(self): """Maps child oids of entSensorValueEntry to their respective values as PanoptesSNMPVariables""" sensor_ent_map = {} # https://github.com/PyCQA/pylint/issues/1694 for ent in self.sensor_entities: # pylint: disable=E1133 sensor_ent_map[ent.oid + u'.' + ent.index] = ent.value return sensor_ent_map def _get_sensor_details(self, index): try: details = dict() entPhysicalNamePrefixIndex = entPhysicalNamePrefix + u'.' + index entSensorValueIndex = entSensorValue + u'.' + index # https://github.com/PyCQA/pylint/issues/1694 details[u'sensor_value'] = int( self.sensor_entity_map[entSensorValueIndex]) # pylint: disable=E1136 # https://github.com/PyCQA/pylint/issues/1694 entity_description = self.entity_physical_entries_map[ entPhysicalNamePrefixIndex] # pylint: disable=E1136 if entity_description in MILLI_ENT_STRINGS: # TODO how many sig digs? details[u'sensor_value'] = old_div(details[u'sensor_value'], 1000) sensor_scale_code = int(self.sensor_entity_map[entSensorScale + u'.' + index]) # pylint: disable=E1136 details[u'sensor_scale'] = sensor_scale_code return details except Exception as e: raise e def _is_celsius_sensor_type(self, index): ans = int(self.sensor_entity_map[entSensorType + u'.' + index]) # pylint: disable=E1136 if ans != 8: self._logger.warn(u"Entity Sensor Type not Celsius: %d" % ans) return ans == 8 def _get_cpu_name(self, cpu_id): cpu_name = self.entity_physical_entries_map[entPhysicalNamePrefix + u'.' + cpu_id] # pylint: disable=E1136 if isinstance(cpu_name, bytes): cpu_name = cpu_name.decode(u'ascii', u'ignore') return cpu_name def _get_cpu_id(self, temp_id): id = self._snmp_connection.get(oid=cpmCPUTotalPhysicalIndex + u'.' + str(temp_id)) return id.value def _get_cpu_interval(self): if 5 <= self._execute_frequency < 60: return cpmCPUTotalMonIntervalValue # replaces cpmCPUTotal5SecRev elif 60 <= self._execute_frequency < 300: return cpmCPUTotal1minRev elif 300 <= self._execute_frequency: return cpmCPUTotal5minRev else: return cpmCPUTotal1minRev def _get_load_metrics(self): try: interval = self._get_qfp_interval() # n.b. There should only be one qfp entry per crypto device. qfp_entry_index = self._get_entity_indices( ent_physical_class=u'cpu', ent_strings=[u'qfp', u'QFP'])[0].split(u'.')[-1] self._load_metrics = dict() processing_load = int( self._snmp_connection.get(oid=ceqfpUtilProcessingLoad + u'.' + qfp_entry_index + u'.' + interval).value) self._load_metrics[u'processing_load'] = processing_load except Exception as e: self._polling_status.handle_exception(u'load', e) try: if self._load_metrics: load_metrics_group = PanoptesMetricsGroup( self._device, u'load', self._execute_frequency) load_metrics_group.add_metric( PanoptesMetric(u'processing_load', self._load_metrics[u'processing_load'], PanoptesMetricType.GAUGE)) self._asr_device_metrics.add(load_metrics_group) self._polling_status.handle_success(u'load') self._logger.debug( u'Found load metrics "%s" for %s: %s' % (self._load_metrics, self._polling_status.device_type, self._device_host)) except Exception as e: self._polling_status.handle_exception(u'load', e) def _get_crypto_metrics(self): self._crypto_metrics = dict() try: crypto_cpu_entry_indices = set([ x.split(u'.')[-1] for x in self._get_entity_indices(ent_physical_class=u'cpu', ent_strings=[u'Crypto Asic']) ]) interval = self._get_crypto_cpu_interval() for index in crypto_cpu_entry_indices: self._crypto_metrics[index] = dict() packets_in = int( self._snmp_connection.get(oid=cepStatsMeasurement + u'.' + index + u'.' + interval + pktsIn).value) packets_out = int( self._snmp_connection.get(oid=cepStatsMeasurement + u'.' + index + u'.' + interval + pktsOut).value) self._crypto_metrics[index][u'packets_in'] = packets_in self._crypto_metrics[index][u'packets_out'] = packets_out self._crypto_metrics[index][u'cpu_name'] = self._get_cpu_name( index) except Exception as e: self._polling_status.handle_exception(u'crypto', e) try: if self._crypto_metrics: for cpu_id in self._crypto_metrics: crypto_metrics_group = PanoptesMetricsGroup( self._device, u'crypto', self._execute_frequency) crypto_metrics_group.add_dimension( PanoptesMetricDimension(u'cpu_no', cpu_id)) crypto_metrics_group.add_dimension( PanoptesMetricDimension( u'cpu_name', self._crypto_metrics[cpu_id][u'cpu_name'])) crypto_metrics_group.add_metric( PanoptesMetric( u'packets_in', self._crypto_metrics[cpu_id][u'packets_in'], PanoptesMetricType.COUNTER)) crypto_metrics_group.add_metric( PanoptesMetric( u'packets_out', self._crypto_metrics[cpu_id][u'packets_out'], PanoptesMetricType.COUNTER)) self._asr_device_metrics.add(crypto_metrics_group) self._polling_status.handle_success(u'crypto') self._logger.debug( u'Found crypto metrics "%s" for %s: %s' % (self._crypto_metrics, self._polling_status.device_type, self._device_host)) except Exception as e: self._polling_status.handle_exception(u'crypto', e) def _convert_scaled_celsius_value_to_units( self, x, scale): # TODO introducing bug when I make static """Convert the provided value in scale-units Celsius to Celsius. N.B. The indices for 'peta' and 'exa' appear to be flip-flopped""" if scale == 14: scale = 15 elif scale == 15: scale = 14 return x * (10**(scale - 9)) def _get_power_metrics(self): try: power_metrics = dict() power_metrics[u'power_module_map'] = dict() power_entity_indices = self._get_entity_indices( ent_physical_class=u'powerSupply', ent_strings=[u'Power Supply Module']) power_metrics[u'power_units_total'] = len(power_entity_indices) for index in power_entity_indices: index = index.split(u'.')[-1] entity_name_index = entPhysicalNamePrefix + u'.' + index entity_name = self.entity_physical_entries_map[ entity_name_index] # pylint: disable=E1136 power_on = True if int(self._snmp_connection.get(oid=cefcFRUPowerOperStatus + u'.' + index).value) \ == 2 else False power_metrics[u'power_module_map'][index] = { u'entity_name': entity_name, u'power_on': power_on } # Todo Collect current as well? -- Not yet self._power_metrics = power_metrics except Exception as e: raise e def _get_temperature_metrics(self): """Don't rely on the ASR's thresholds; report everything and do our own alerting.""" try: temp_metrics = dict() temperature_entity_indices = self._get_entity_indices( ent_physical_class=u'sensor', ent_strings=[u'temp', u'Temp']) for index in temperature_entity_indices: index = index.split(u'.')[-1] temp_metrics[index] = { u'entity_name': self.entity_physical_entries_map[entPhysicalNamePrefix + u'.' + index] # pylint: disable=E1136 } if self._is_celsius_sensor_type(index): temp_metrics[index][ u'sensor_details'] = self._get_sensor_details(index) celsius_value = temp_metrics[index][u'sensor_details'][ u'sensor_value'] scale = temp_metrics[index][u'sensor_details'][ u'sensor_scale'] celsius_value_in_units = self._convert_scaled_celsius_value_to_units( celsius_value, scale) temp_f = convert_celsius_to_fahrenheit( celsius_value_in_units) if 33 < temp_f < 200: temp_metrics[index]['temp_f'] = temp_f else: del temp_metrics[index] self._temp_metrics = temp_metrics except Exception as e: raise e def _get_environment_metrics(self): try: self._get_temperature_metrics() self._logger.debug( u'Found Temperature metrics "%s" for %s: %s' % (self._temp_metrics, self._polling_status.device_type, self._device_host)) except Exception as e: self._polling_status.handle_exception(u'environment', e) try: self._get_power_metrics() self._logger.debug( u'Found Power metrics "%s" for %s: %s' % (self._power_metrics, self._polling_status.device_type, self._device_host)) # TODO Valuable to monitor cefcFRUPowerOperStatus for status "9: onButFanFail"? -- Yes, but not now except Exception as e: self._polling_status.handle_exception(u'environment', e) try: if self._temp_metrics: for index in list(self._temp_metrics.keys()): environment_metrics_group = PanoptesMetricsGroup( self._device, u'environment', self._execute_frequency) environment_metrics_group.add_dimension( PanoptesMetricDimension( u'entity_name', self._temp_metrics[index][u'entity_name'])) environment_metrics_group.add_metric( PanoptesMetric(u'temperature_fahrenheit', self._temp_metrics[index][u'temp_f'], PanoptesMetricType.GAUGE)) self._asr_device_metrics.add(environment_metrics_group) self._polling_status.handle_success(u'environment') except Exception as e: self._polling_status.handle_exception(u'environment', e) # TODO Do we need to report sensor details as well? -- Not yet try: if self._power_metrics: environment_metrics_group = PanoptesMetricsGroup( self._device, u'environment', self._execute_frequency) num_power_units_on = 0 for index in list( self._power_metrics[u'power_module_map'].keys()): if self._power_metrics[u'power_module_map'][index][ u'power_on']: num_power_units_on += 1 environment_metrics_group.add_metric( PanoptesMetric(u'power_units_total', self._power_metrics[u'power_units_total'], PanoptesMetricType.GAUGE)) environment_metrics_group.add_metric( PanoptesMetric(u'power_units_on', num_power_units_on, PanoptesMetricType.GAUGE)) self._asr_device_metrics.add(environment_metrics_group) self._polling_status.handle_success(u'environment') except Exception as e: self._polling_status.handle_exception(u'environment', e) def _get_memory_metrics(self): self._memory_metrics = dict() self._memory_metrics[u'dram'] = dict() try: memory_used = int( self._snmp_connection.get(oid=cempMemPoolHCUsed).value) self._memory_metrics[u'dram'][u'memory_used'] = memory_used memory_free = int( self._snmp_connection.get(oid=cempMemPoolHCFree).value) self._memory_metrics[u'dram'][ u'memory_total'] = memory_used + memory_free except Exception as e: self._polling_status.handle_exception(u'memory', e) self._memory_metrics.pop(u'dram') self._memory_metrics[u'qfp'] = dict( ) # TODO Safe to assume only one qfp_entry? try: qfp_entry_indices = set([ x.split(u'.')[-1] for x in self._get_entity_indices(ent_physical_class=u'cpu', ent_strings=[u'qfp', u'QFP']) ]) for index in qfp_entry_indices: qfp_memory_used = int( self._snmp_connection.get(oid=ceqfpMemoryResInUse + u'.' + index + u'.' + u'1').value) self._memory_metrics[u'qfp'][u'memory_used'] = qfp_memory_used qfp_memory_free = int( self._snmp_connection.get(oid=ceqfpMemoryResFree + u'.' + index + u'.' + u'1').value) self._memory_metrics[u'qfp'][ u'memory_total'] = qfp_memory_used + qfp_memory_free except Exception as e: self._polling_status.handle_exception(u'memory', e) self._memory_metrics.pop( u'qfp') # TODO Safe to assume only one qfp_entry? try: if len(self._memory_metrics) > 0: for memory_type in self._memory_metrics: memory_metrics_group = PanoptesMetricsGroup( self._device, u'memory', self._execute_frequency) memory_metrics_group.add_dimension( PanoptesMetricDimension(u'memory_type', memory_type)) memory_metrics_group.add_metric( PanoptesMetric( u'memory_used', self._memory_metrics[memory_type][u'memory_used'], PanoptesMetricType.GAUGE)) memory_metrics_group.add_metric( PanoptesMetric( u'memory_total', self._memory_metrics[memory_type][u'memory_total'], PanoptesMetricType.GAUGE)) self._asr_device_metrics.add(memory_metrics_group) self._polling_status.handle_success(u'memory') self._logger.debug( u'Found Memory metrics "%s" for %s: %s' % (self._memory_metrics, self._polling_status.device_type, self._device_host)) except Exception as e: self._polling_status.handle_exception(u'memory', e) def _get_system_cpu_metrics(self): self._cpu_metrics = dict() self._cpu_metrics[u'ctrl'] = dict() try: cpus = self._snmp_connection.bulk_walk( oid=self._get_cpu_interval(), non_repeaters=0, max_repetitions=25) if len(cpus) == 0: raise PanoptesMetricsNullException for cpu in cpus: # The last int for each cpu is a temporary index we will append to the entPhysicalNamePrefix # and cpmCPUTotalPhysicalIndex OIDS to get the cpu name and id values, respectively temp_id = int(cpu.index.rsplit(u'.', 1)[-1]) # last object cpu_id = self._get_cpu_id(temp_id) self._cpu_metrics[u'ctrl'][cpu_id] = dict() self._cpu_metrics[u'ctrl'][cpu_id][u'cpu_util'] = int( cpu.value) self._cpu_metrics[u'ctrl'][cpu_id][ u'cpu_name'] = self._get_cpu_name( cpu_id) # report name, num as dim except Exception as e: self._polling_status.handle_exception(u'cpu', e) self._cpu_metrics.pop(u'ctrl') self._cpu_metrics[u'data'] = dict() try: interval = self._get_crypto_cpu_interval() crypto_cpu_entry_indices = set([ x.split(u'.')[-1] for x in self._get_entity_indices(ent_physical_class=u'cpu', ent_strings=[u'Crypto Asic']) ]) for index in crypto_cpu_entry_indices: self._cpu_metrics[u'data'][index] = dict() # todo special def for u'1'/util? cpu_util = int( self._snmp_connection.get(oid=cepStatsMeasurement + u'.' + index + u'.' + interval + cpuUtil).value) self._cpu_metrics[u'data'][index][u'cpu_util'] = cpu_util self._cpu_metrics[u'data'][index][ u'cpu_name'] = self._get_cpu_name(index) except Exception as e: self._polling_status.handle_exception(u'cpu', e) self._cpu_metrics.pop(u'data') try: if len(self._cpu_metrics) > 0: for cpu_type in self._cpu_metrics: for cpu_id in list(self._cpu_metrics[cpu_type].keys()): cpu_metrics_group = PanoptesMetricsGroup( self._device, u'cpu', self._execute_frequency) cpu_metrics_group.add_dimension( PanoptesMetricDimension(u'cpu_type', cpu_type)) cpu_metrics_group.add_dimension( PanoptesMetricDimension(u'cpu_no', cpu_id)) cpu_metrics_group.add_dimension( PanoptesMetricDimension( u'cpu_name', self._cpu_metrics[cpu_type] [cpu_id][u'cpu_name'])) cpu_metrics_group.add_metric( PanoptesMetric( u'cpu_utilization', self._cpu_metrics[cpu_type] [cpu_id][u'cpu_util'], PanoptesMetricType.GAUGE)) self._asr_device_metrics.add(cpu_metrics_group) self._polling_status.handle_success(u'cpu') self._logger.debug( u'Found CPU metrics "%s" for %s: %s' % (self._cpu_metrics, self._polling_status.device_type, self._device_host)) except Exception as e: self._polling_status.handle_exception(u'cpu', e) def get_device_metrics(self): start_time = time.time() try: start_time = time.time() self._snmp_connection = PanoptesSNMPConnectionFactory.get_snmp_connection( plugin_context=self._plugin_context, resource=self._device) except Exception as e: self._polling_status.handle_exception(u'device', e) finally: if self._polling_status.device_status != DEVICE_METRICS_STATES.SUCCESS: self._asr_device_metrics.add( self._polling_status.device_status_metrics_group) return self._asr_device_metrics self._get_system_cpu_metrics() self._get_memory_metrics() self._get_environment_metrics() self._get_crypto_metrics() self._get_load_metrics() end_time = time.time() self._logger.info(u'SNMP calls for ASR %s completed in %.2f seconds' % (self._device_host, end_time - start_time)) self._asr_device_metrics.add( self._polling_status.device_status_metrics_group) return self._asr_device_metrics def run(self, context): self._plugin_context = context self._logger = context.logger self._device = context.data self._device_host = self._device.resource_endpoint self._device_model = self._device.resource_metadata.get( u'model', u'unknown') self._execute_frequency = int( context.config[u'main'][u'execute_frequency']) self._snmp_connection = None self._asr_device_metrics = PanoptesMetricsGroupSet() try: polling_status_metric_name = context.config[u'main'][ u'polling_status_metric_name'] except: self._logger.error( u'Polling status metric name not defined for %s' % self._device_host) raise PanoptesPollingPluginConfigurationError( u'Polling status metric name not defined for %s' % self._device_host) self._polling_status = PanoptesPollingStatus( resource=self._device, execute_frequency=self._execute_frequency, logger=self._logger, metric_name=polling_status_metric_name) self._max_repetitions = _MAX_REPETITIONS # Todo self._logger.info( u'Going to poll ASR Device "%s" (model "%s") for device metrics' % (self._device_host, self._device_model)) start_time = time.time() device_results = self.get_device_metrics() end_time = time.time() if device_results: self._logger.info( u'Done polling ASR Device metrics for device "%s" in %.2f seconds, %s metrics' % (self._device_host, end_time - start_time, len(device_results))) else: self._logger.warn( u'Error polling device metrics for ASR Device "%s" (model "%s")' % (self._device_host, self._device_model)) return device_results
class PluginPollingAristaDeviceMetrics(PanoptesPollingPlugin): def __init__(self): self._plugin_context = None self._logger = None self._device = None self._device_host = None self._device_model = None self._execute_frequency = None self._snmp_connection = None self._arista_device_metrics = PanoptesMetricsGroupSet() self._polling_status = None self._max_repetitions = None self._cpu_metrics = None self._memory_metrics = None self._temp_metrics = None self._power_metrics = None self._fan_metrics = None self._storage_metrics = None self._device_metrics = None super(PluginPollingAristaDeviceMetrics, self).__init__() @threaded_cached_property def device_descriptions(self): return self._snmp_connection.bulk_walk( oid=hrDeviceDescription, non_repeaters=0, max_repetitions=_MAX_REPETITIONS) @threaded_cached_property def _device_descriptions_map(self): device_descriptions_map = dict() for ent in self.device_descriptions: # pylint: disable=E1133 value = ent.value if not is_python_2() and isinstance(value, bytes): value = value.decode(u'ascii', u'ignore') device_descriptions_map[ent.oid + u'.' + ent.index] = value return device_descriptions_map def _get_cpu_name(self, temp_id): core_num = self._device_descriptions_map[hrDeviceDescription + u'.' + str(temp_id)] # pylint: disable=E1136 return self._device_descriptions_map[hrDeviceDescription + u'.' + u'1'] + u'/' + core_num # pylint: disable=E1136 # TODO is mutable type allowed? def _get_entity_indices(self, ent_physical_class, ent_strings): ent_indices = [] # https://github.com/PyCQA/pylint/issues/1694 for ent in self.entities: # pylint: disable=E1133 value = ent.value if not is_python_2() and isinstance(value, bytes): value = value.decode(u'ascii', u'ignore') physical_class = entPhysicalClassPrefix + u'.' + ent.index.split( u'.')[-1] # pylint: disable=E1133 physical_class_index = int( self.entity_physical_entries_map[physical_class]) # pylint: disable=E1136 if ENT_PHYSICAL_CLASSES[ physical_class_index] == ent_physical_class: for s in ent_strings: if s in value: ent_indices.append(ent.index) return ent_indices @threaded_cached_property def entities(self): return self._snmp_connection.bulk_walk( oid=entPhysicalEntry, non_repeaters=0, max_repetitions=_MAX_REPETITIONS) @threaded_cached_property def host_resources(self): return self._snmp_connection.bulk_walk( oid=HOST_RESOURCES_MIB_PREFIX, non_repeaters=0, max_repetitions=_MAX_REPETITIONS) @threaded_cached_property def host_resources_map(self): host_resources_map = dict() for ent in self.host_resources: # pylint: disable=E1133 value = ent.value if not is_python_2() and isinstance(value, bytes): value = value.decode(u'ascii', u'ignore') host_resources_map[ent.oid + u'.' + ent.index] = value return host_resources_map @threaded_cached_property def entity_physical_entries_map(self): """Maps child oids of entPhysicalEntry to their respective values as PanoptesSNMPVariables""" ent_physical_entries_map = {} # https://github.com/PyCQA/pylint/issues/1694 for ent in self.entities: # pylint: disable=E1133 value = ent.value if not is_python_2() and isinstance(value, bytes): value = value.decode(u'ascii', u'ignore') ent_physical_entries_map[ent.oid + u'.' + ent.index] = value return ent_physical_entries_map @threaded_cached_property def sensor_entities(self): return self._snmp_connection.bulk_walk(oid=ENT_PHY_SENSOR_PREFIX, non_repeaters=0, max_repetitions=25) @threaded_cached_property def sensor_entity_map(self): """Maps child oids of ENT_PHY_SENSOR_PREFIX to their respective values as PanoptesSNMPVariables""" sensor_ent_map = {} # https://github.com/PyCQA/pylint/issues/1694 for ent in self.sensor_entities: # pylint: disable=E1133 sensor_ent_map[ent.oid + u'.' + ent.index] = ent.value return sensor_ent_map def _get_sensor_details(self, index): try: details = dict() entSensorValueIndex = entPhySensorValue + u'.' + index # https://github.com/PyCQA/pylint/issues/1694 details[u'sensor_value'] = int( self.sensor_entity_map[entSensorValueIndex]) # pylint: disable=E1136 details[u'sensor_scale'] = int( self.sensor_entity_map[entPhySensorScale + u'.' + index]) # pylint: disable=E1136 return details except Exception as e: raise e def _convert_scaled_celsius_value_to_units( self, x, scale): # TODO introducing bug when I make static """Convert the provided value in scale-units Celsius to Celsius. N.B. The indices for u'peta' and u'exa' appear to be flip-flopped""" if scale == 14: scale = 15 elif scale == 15: scale = 14 return x * (10**(scale - 9)) def _get_temperature_metrics(self): try: self._temp_metrics = dict() temperature_entity_indices = self._get_entity_indices( ent_physical_class=u'sensor', ent_strings=[u'temp', u'Temp']) for index in temperature_entity_indices: index = index.split(u'.')[-1] # TODO should be name, even though blank? -- Can't be; name must be non-empty self._temp_metrics[index] = { u'sensor': self.entity_physical_entries_map[entPhysicalDescrPrefix + u'.' + index] # pylint: disable=E1136 } self._temp_metrics[index][ u'sensor_details'] = self._get_sensor_details(index) # reported in deci-degrees C celsius_value = self._temp_metrics[index][u'sensor_details'][ u'sensor_value'] / 10.0 scale = self._temp_metrics[index][u'sensor_details'][ u'sensor_scale'] celsius_value_in_units = self._convert_scaled_celsius_value_to_units( celsius_value, scale) self._temp_metrics[index][ u'temp_f'] = convert_celsius_to_fahrenheit( celsius_value_in_units) except Exception as e: raise e def _power_is_on(self, index): # TODO better to base off of current AND voltage difference, or current alone will suffice? return True if int( self.sensor_entity_map[entPhySensorValue + u'.' + str(( int( # pylint: disable=E1136 index) + _INPUT_CURRENT_SENSOR_OFFSET))]) > 0 else False # pylint: disable=E1136 def _get_power_metrics(self): try: power_metrics = dict() power_metrics[u'power_module_map'] = dict() # won't work here alone b/c on/off not given power_entity_indices = self._get_entity_indices( ent_physical_class=u'powerSupply', ent_strings=[u'PowerSupply']) power_metrics[u'power_units_total'] = len(power_entity_indices) for index in power_entity_indices: index = index.split(u'.')[-1] entity_name_index = entPhysicalDescrPrefix + u'.' + index # Use Descr b/c name is empty entity_name = self.entity_physical_entries_map[ entity_name_index] # pylint: disable=E1136 power_on = self._power_is_on(index) power_metrics[u'power_module_map'][index] = { u'entity_name': entity_name, u'power_on': power_on } # Todo Collect current as well? -- Not yet # TODO is this a better pattern b/c if an Exception is raised, the property won't have yet been updated? self._power_metrics = power_metrics except Exception as e: raise e def _fan_is_ok(self, index): return 1 if int(self.sensor_entity_map[entPhySensorValue + u'.' + index]) > 0 else 0 # pylint: disable=E1136 # Maximum nominal fan speed is 27000 RPM def _get_fan_metrics(self): try: fan_metrics = dict() fan_entity_indices = self._get_entity_indices( ent_physical_class=u'sensor', ent_strings=[u'Fan 1 Sensor 1']) fan_metrics[u'fans_total'] = len(fan_entity_indices) fans_ok = 0 for index in fan_entity_indices: index = index.split(u'.')[-1] fans_ok += self._fan_is_ok(index) fan_metrics[u'fans_ok'] = fans_ok self._fan_metrics = fan_metrics except Exception as e: raise e def _get_environment_metrics(self): try: self._get_temperature_metrics() self._logger.debug( u'Found Temperature metrics "%s" for %s: %s' % (self._temp_metrics, self._polling_status.device_type, self._device_host)) except Exception as e: self._polling_status.handle_exception(u'environment', e) try: self._get_power_metrics() self._logger.debug( u'Found Power metrics "%s" for %s: %s' % (self._power_metrics, self._polling_status.device_type, self._device_host)) except Exception as e: self._polling_status.handle_exception(u'environment', e) try: self._get_fan_metrics() self._logger.debug( u'Found Fan metrics "%s" for %s: %s' % (self._fan_metrics, self._polling_status.device_type, self._device_host)) except Exception as e: self._polling_status.handle_exception(u'environment', e) try: if self._temp_metrics: for index in list(self._temp_metrics.keys()): environment_metrics_group = PanoptesMetricsGroup( self._device, u'environment', self._execute_frequency) environment_metrics_group.add_dimension( PanoptesMetricDimension( u'sensor', self._temp_metrics[index][u'sensor'])) environment_metrics_group.add_metric( PanoptesMetric(u'temperature_fahrenheit', self._temp_metrics[index][u'temp_f'], PanoptesMetricType.GAUGE)) self._arista_device_metrics.add(environment_metrics_group) self._polling_status.handle_success(u'environment') except Exception as e: self._polling_status.handle_exception(u'environment', e) # TODO Do we need to report sensor details as well? -- Not yet try: if self._power_metrics: environment_metrics_group = PanoptesMetricsGroup( self._device, u'environment', self._execute_frequency) num_power_units_on = 0 for index in list( self._power_metrics[u'power_module_map'].keys()): if self._power_metrics[u'power_module_map'][index][ u'power_on']: num_power_units_on += 1 environment_metrics_group.add_metric( PanoptesMetric(u'power_units_total', self._power_metrics[u'power_units_total'], PanoptesMetricType.GAUGE)) environment_metrics_group.add_metric( PanoptesMetric(u'power_units_on', num_power_units_on, PanoptesMetricType.GAUGE)) self._arista_device_metrics.add(environment_metrics_group) self._polling_status.handle_success(u'environment') except Exception as e: self._polling_status.handle_exception(u'environment', e) try: if self._fan_metrics: environment_metrics_group = PanoptesMetricsGroup( self._device, u'environment', self._execute_frequency) environment_metrics_group.add_metric( PanoptesMetric(u'fans_total', self._fan_metrics[u'fans_total'], PanoptesMetricType.GAUGE)) environment_metrics_group.add_metric( PanoptesMetric(u'fans_ok', self._fan_metrics[u'fans_ok'], PanoptesMetricType.GAUGE)) self._arista_device_metrics.add(environment_metrics_group) self._polling_status.handle_success(u'environment') except Exception as e: self._polling_status.handle_exception(u'environment', e) def _get_memory_metrics(self): self._memory_metrics = dict() self._memory_metrics[u'dram'] = dict() try: allocation_units = int( self.host_resources_map[hrStorageAllocationUnits + u'.1']) # pylint: disable=E1136 memory_used = ( int(self.host_resources_map[hrStorageUsed + u'.1']) - int(self.host_resources_map[hrStorageUsed + u'.3']) ) * allocation_units # total - cached self._memory_metrics[u'dram'][u'memory_used'] = memory_used memory_total = \ int(self.host_resources_map[hrStorageSize + u'.1']) * allocation_units # pylint: disable=E1136 self._memory_metrics[u'dram'][u'memory_total'] = memory_total except Exception as e: self._polling_status.handle_exception(u'memory', e) self._memory_metrics.pop(u'dram') try: if len(self._memory_metrics) > 0: for memory_type in self._memory_metrics: memory_metrics_group = PanoptesMetricsGroup( self._device, u'memory', self._execute_frequency) memory_metrics_group.add_dimension( PanoptesMetricDimension(u'memory_type', memory_type)) memory_metrics_group.add_metric( PanoptesMetric( u'memory_used', self._memory_metrics[memory_type][u'memory_used'], PanoptesMetricType.GAUGE)) memory_metrics_group.add_metric( PanoptesMetric( u'memory_total', self._memory_metrics[memory_type][u'memory_total'], PanoptesMetricType.GAUGE)) self._arista_device_metrics.add(memory_metrics_group) self._polling_status.handle_success(u'memory') self._logger.debug( u'Found Memory metrics "%s" for %s: %s' % (self._memory_metrics, self._polling_status.device_type, self._device_host)) except Exception as e: self._polling_status.handle_exception(u'memory', e) def _get_system_cpu_metrics(self): self._cpu_metrics = dict() self._cpu_metrics[u'ctrl'] = dict() try: cpus = self._snmp_connection.bulk_walk( oid=hrProcessorLoad, non_repeaters=0, max_repetitions=_MAX_REPETITIONS) if len(cpus) == 0: raise PanoptesMetricsNullException for cpu in cpus: # The last int for each cpu is a temporary index we will append to hrDeviceDescription to get the name temp_id = int(cpu.index.rsplit(u'.', 1)[-1]) # last object if temp_id != 1: # only include individual core info self._cpu_metrics[u'ctrl'][temp_id] = dict() self._cpu_metrics[u'ctrl'][temp_id][u'cpu_util'] = int( cpu.value) self._cpu_metrics[u'ctrl'][temp_id][ u'cpu_name'] = self._get_cpu_name(temp_id) except Exception as e: self._polling_status.handle_exception(u'cpu', e) self._cpu_metrics.pop(u'ctrl') try: if len(self._cpu_metrics) > 0: for cpu_type in self._cpu_metrics: for cpu_id in list(self._cpu_metrics[cpu_type].keys()): cpu_metrics_group = PanoptesMetricsGroup( self._device, u'cpu', self._execute_frequency) cpu_metrics_group.add_dimension( PanoptesMetricDimension(u'cpu_type', cpu_type)) cpu_metrics_group.add_dimension( PanoptesMetricDimension(u'cpu_no', u'1.' + str(cpu_id))) cpu_metrics_group.add_dimension( PanoptesMetricDimension( u'cpu_name', self._cpu_metrics[cpu_type] [cpu_id][u'cpu_name'])) cpu_metrics_group.add_metric( PanoptesMetric( u'cpu_utilization', self._cpu_metrics[cpu_type] [cpu_id][u'cpu_util'], PanoptesMetricType.GAUGE)) self._arista_device_metrics.add(cpu_metrics_group) self._polling_status.handle_success(u'cpu') self._logger.debug( u'Found CPU metrics "%s" for %s: %s' % (self._cpu_metrics, self._polling_status.device_type, self._device_host)) except Exception as e: self._polling_status.handle_exception(u'cpu', e) def _get_host_resource_indices(self, oid_filter=u'', host_resource_strings=list()): host_resource_indices = [] # https://github.com/PyCQA/pylint/issues/1694 for key, value in list(self.host_resources_map.items()): # pylint: disable=E1101 if oid_filter in key: for s in host_resource_strings: if s in value: host_resource_indices.append(key.split(u'.')[-1]) return host_resource_indices def _get_storage_metrics(self): self._storage_metrics = dict() try: host_resource_storage_indices = \ self._get_host_resource_indices(oid_filter=hrStorageType, host_resource_strings=[hrStorageFlashMemory, hrStorageVirtualMemory]) for index in host_resource_storage_indices: storage_descriptor = self.host_resources_map[hrStorageDescr + u'.' + index] # pylint: disable=E1136 self._storage_metrics[storage_descriptor] = dict() allocation_units = int( self.host_resources_map[hrStorageAllocationUnits + u'.' + index]) # pylint: disable=E1136 self._storage_metrics[storage_descriptor][u'storage_used'] = \ int(self.host_resources_map[ hrStorageUsed + u'.' + index]) * allocation_units # pylint: disable=E1136 self._storage_metrics[storage_descriptor][u'storage_total'] = \ int(self.host_resources_map[ hrStorageSize + u'.' + index]) * allocation_units # pylint: disable=E1136 self._storage_metrics[storage_descriptor][u'storage_type'] = \ STORAGE_TYPE_REVERSE_MAP[self.host_resources_map[ hrStorageType + u'.' + index]] # pylint: disable=E1136 except Exception as e: self._polling_status.handle_exception(u'storage', e) # todo Do we need to pop the stats from self._storage_metrics? try: if len(self._storage_metrics) > 0: for storage_entity in self._storage_metrics: storage_metrics_group = PanoptesMetricsGroup( self._device, u'storage', self._execute_frequency) storage_metrics_group.add_dimension( PanoptesMetricDimension( u'storage_type', self._storage_metrics[storage_entity] [u'storage_type'])) storage_metrics_group.add_dimension( PanoptesMetricDimension(u'storage_entity', storage_entity)) storage_metrics_group.add_metric( PanoptesMetric( u'storage_used', self._storage_metrics[storage_entity] [u'storage_used'], PanoptesMetricType.GAUGE)) storage_metrics_group.add_metric( PanoptesMetric( u'storage_total', self._storage_metrics[storage_entity] [u'storage_total'], PanoptesMetricType.GAUGE)) self._arista_device_metrics.add(storage_metrics_group) self._polling_status.handle_success(u'storage') self._logger.debug( u'Found Storage metrics "%s" for %s: %s' % (self._storage_metrics, self._polling_status.device_type, self._device_host)) except Exception as e: self._polling_status.handle_exception(u'storage', e) def get_device_metrics(self): start_time = time.time() try: start_time = time.time() self._snmp_connection = PanoptesSNMPConnectionFactory.get_snmp_connection( plugin_context=self._plugin_context, resource=self._device) except Exception as e: self._polling_status.handle_exception(u'device', e) finally: if self._polling_status.device_status != DEVICE_METRICS_STATES.SUCCESS: self._arista_device_metrics.add( self._polling_status.device_status_metrics_group) return self._arista_device_metrics self._get_system_cpu_metrics() self._get_memory_metrics() self._get_environment_metrics() self._get_storage_metrics() end_time = time.time() self._logger.info( u'SNMP calls for Arista %s completed in %.2f seconds' % (self._device_host, end_time - start_time)) self._arista_device_metrics.add( self._polling_status.device_status_metrics_group) return self._arista_device_metrics def run(self, context): self._plugin_context = context self._logger = context.logger self._device = context.data self._device_host = self._device.resource_endpoint self._device_model = self._device.resource_metadata.get( u'model', u'unknown') self._execute_frequency = int( context.config[u'main'][u'execute_frequency']) self._snmp_connection = None self._arista_device_metrics = PanoptesMetricsGroupSet() self._polling_status = PanoptesPollingStatus( resource=self._device, execute_frequency=self._execute_frequency, logger=self._logger) self._max_repetitions = _MAX_REPETITIONS self._logger.info( u'Going to poll Arista device "%s" (model "%s") for device metrics' % (self._device_host, self._device_model)) start_time = time.time() device_results = self.get_device_metrics() end_time = time.time() if device_results: self._logger.info( u'Done polling Arista Device metrics for device "%s" in %.2f seconds, %s metrics' % (self._device_host, end_time - start_time, len(device_results))) else: self._logger.warn( u'Error polling device metrics for Arista device %s' % self._device_host) return device_results
def test_panoptes_metrics_group_set(self): """Tests basic PanoptesMetricsGroupSet operations""" metrics_group_set = PanoptesMetricsGroupSet() metrics_group = PanoptesMetricsGroup(self.__panoptes_resource, u'test', 120) metrics_group_two = PanoptesMetricsGroup(self.__panoptes_resource, u'test', 120) metrics_group_set.add(metrics_group) metrics_group_set.add(metrics_group_two) assert len(metrics_group_set) == 1 self.assertIn(metrics_group, metrics_group_set.metrics_groups) metrics_group_set.remove(metrics_group_two) assert len(metrics_group_set) == 0 metrics_group_set.add(metrics_group) metrics_group_three = PanoptesMetricsGroup(self.__panoptes_resource, u'test3', 120) metrics_group_three.add_metric( PanoptesMetric(u"test3", 0.0, PanoptesMetricType.GAUGE)) metrics_group_set.add(metrics_group_three) assert len(metrics_group_set) == 2 metrics_group_set_two = PanoptesMetricsGroupSet() metrics_group_four = PanoptesMetricsGroup(self.__panoptes_resource, u'test', 120) metrics_group_four.add_metric( PanoptesMetric(u"test4", 0.0, PanoptesMetricType.GAUGE)) metrics_group_set_two.add(metrics_group_four) assert len(metrics_group_set_two) == 1 # Test PanoptesMetricsGroupSet.__add__ metrics_group_set_union = metrics_group_set + metrics_group_set_two assert len(metrics_group_set_union) == 3 with self.assertRaises(AssertionError): metrics_group_set.remove(self.__panoptes_resource) with self.assertRaises(TypeError): metrics_group_set + metrics_group # Test PanoptesMetricsGroupSet.__iter__ & 'next' metrics_group_count = 0 metrics_group_set_union_interator = iter(metrics_group_set_union) for _ in metrics_group_set_union: self.assertIn(next(metrics_group_set_union_interator), metrics_group_set_union.metrics_groups) metrics_group_count += 1 assert len(metrics_group_set_union) == metrics_group_count with self.assertRaises(Exception): next(metrics_group_set_union_interator) # Test PanoptesMetricsGroupSet.__repr__ _METRICS_GROUP_SET_REPR = u"PanoptesMetricsGroupSet[PanoptesMetricsGroup[resource:" \ u"plugin|test|site|test|class|test|subclass|test|type|test|id|test|endpoint|test," \ u"interval:120,schema_version:0.2,group_type:test,creation_timestamp:{}," \ u"dimensions:[],metrics:[]],PanoptesMetricsGroup[resource:" \ u"plugin|test|site|test|class|test|subclass|test|type|test|id|test|endpoint|test," \ u"interval:120,schema_version:0.2,group_type:test3,creation_timestamp:{}," \ u"dimensions:[],metrics:[" \ u"PanoptesMetric[test3|0.0|GAUGE|{}]]]]".format(mock_time.return_value, mock_time.return_value, mock_time.return_value) self.assertEqual(repr(metrics_group_set), _METRICS_GROUP_SET_REPR)
def _process_transforms(context, transforms, metrics_group_set): """ { "member": [{"rate": ["real_bytes_in", "real_bytes_out", "real_packets_in", "real_packets_out", "real_total_connections"]}] } Args: context (PanoptesContext): The PanoptesContext being used by the Plugin Agent transforms (dict): The transformations to apply metrics_group_set (PanoptesMetricsGroupSet): The metrics group set on which to apply the transformations Returns: PanoptesMetricsGroupSet: The processed/transformed metrics group set """ callbacks = {u'rate': _transformation_rate} logger = context.logger lookup = dict() output_metrics_group_set = PanoptesMetricsGroupSet() logger.debug(u'Going to process transforms: %s' % transforms) for key in transforms: transform_type, transform_metrics_group_type, transform_inputs = transforms[ key].split(':') transform_inputs = _split_and_strip(transform_inputs) if transform_metrics_group_type not in lookup: lookup[transform_metrics_group_type] = list() lookup[transform_metrics_group_type].append( (transform_type, transform_inputs)) logger.debug(u'Transform lookups: %s' % lookup) for metrics_group in metrics_group_set: if metrics_group.group_type not in lookup: output_metrics_group_set.add(metrics_group) continue resource_serialization_key = metrics_group.resource.serialization_key for transform in lookup[metrics_group.group_type]: logger.debug(u'For resource %s, trying to process transform %s' % (resource_serialization_key, transform)) transform_type, transform_inputs = transform if transform_type not in callbacks: logger.warn( u'For resource %s, no implementation for transform type "%s" found, skipping' % (resource_serialization_key, transform)) continue try: output_metrics_group = callbacks[transform_type]( context, metrics_group, transform_inputs) if output_metrics_group is not None: output_metrics_group_set.add(output_metrics_group) except Exception as e: logger.error( u'For resource %s, error while trying to transform metrics group "%s": %s, skipping' % (resource_serialization_key, metrics_group.group_type, repr(e))) return output_metrics_group_set
def test_panoptes_metrics_group_set(self): """Tests basic PanoptesMetricsGroupSet operations""" metrics_group_set = PanoptesMetricsGroupSet() metrics_group = PanoptesMetricsGroup(self.__panoptes_resource, 'test', 120) metrics_group_two = PanoptesMetricsGroup(self.__panoptes_resource, 'test', 120) metrics_group_set.add(metrics_group) metrics_group_set.add(metrics_group_two) assert len(metrics_group_set) == 1 self.assertIn(metrics_group, metrics_group_set.metrics_groups) metrics_group_set.remove(metrics_group_two) assert len(metrics_group_set) == 0 metrics_group_set.add(metrics_group) metrics_group_three = PanoptesMetricsGroup(self.__panoptes_resource, 'test3', 120) metrics_group_three.add_metric( PanoptesMetric("test3", 0.0, PanoptesMetricType.GAUGE)) metrics_group_set.add(metrics_group_three) assert len(metrics_group_set) == 2 metrics_group_set_two = PanoptesMetricsGroupSet() metrics_group_four = PanoptesMetricsGroup(self.__panoptes_resource, 'test', 120) metrics_group_four.add_metric( PanoptesMetric("test4", 0.0, PanoptesMetricType.GAUGE)) metrics_group_set_two.add(metrics_group_four) assert len(metrics_group_set_two) == 1 # Test PanoptesMetricsGroupSet.__add__ metrics_group_set_union = metrics_group_set + metrics_group_set_two assert len(metrics_group_set_union) == 3 with self.assertRaises(AssertionError): metrics_group_set.remove(self.__panoptes_resource) with self.assertRaises(TypeError): metrics_group_set + metrics_group # Test PanoptesMetricsGroupSet.__iter__ & 'next' metrics_group_count = 0 metrics_group_set_union_interator = iter(metrics_group_set_union) for _ in metrics_group_set_union: self.assertIn(metrics_group_set_union_interator.next(), metrics_group_set_union.metrics_groups) metrics_group_count += 1 assert len(metrics_group_set_union) == metrics_group_count with self.assertRaises(Exception): metrics_group_set_union_interator.next() # Test PanoptesMetricsGroupSet.__repr__ _METRICS_GROUP_SET_REPR = "set([{{'metrics_group_interval': 120, " \ "'resource': plugin|test|site|test|class|test|subclass|test|type|test|id|test|" \ "endpoint|test, 'dimensions': set([]), 'metrics_group_type': 'test', " \ "'metrics': set([]), 'metrics_group_creation_timestamp': {}, " \ "'metrics_group_schema_version': '0.2'}}, {{'metrics_group_interval': 120, " \ "'resource': plugin|test|site|test|class|test|subclass|test|type|test|id|test|" \ "endpoint|test, 'dimensions': set([]), 'metrics_group_type': 'test3', " \ "'metrics': set([{{'metric_creation_timestamp': {}, " \ "'metric_type': 'gauge', " \ "'metric_name': 'test3', 'metric_value': 0.0}}]), " \ "'metrics_group_creation_timestamp': {}, " \ "'metrics_group_schema_version': '0.2'}}])".format(mock_time.return_value, mock_time.return_value, mock_time.return_value) self.assertEqual(repr(metrics_group_set), _METRICS_GROUP_SET_REPR)