Пример #1
0
    def report_cluster(self, config, cluster):
        uptime = cluster.get('nimbusUptime', None)
        if uptime is not None:
            self.gauge(self.metric(config, 'cluster.nimbus_uptime_seconds'),
                    storm_utils.translate_timespec(uptime),
                    tags=config.tags)

        self.gauge(self.metric(config, 'cluster.slots_used_count'),
                   cluster['slotsUsed'],
                   tags=config.tags)
        self.gauge(self.metric(config, 'cluster.supervisor_count'),
                   cluster['supervisors'],
                   tags=config.tags)
        self.gauge(self.metric(config, 'cluster.slots_total_count'),
                   cluster['slotsTotal'],
                   tags=config.tags)
        self.gauge(self.metric(config, 'cluster.slots_free_count'),
                   cluster['slotsFree'],
                   tags=config.tags)
        self.gauge(self.metric(config, 'cluster.executors_total_count'),
                   cluster['executorsTotal'],
                   tags=config.tags)
        self.gauge(self.metric(config, 'cluster.tasks_total_count'),
                   cluster['tasksTotal'],
                   tags=config.tags)
Пример #2
0
 def test_timespec(self):
     self.assertEqual(330, storm_utils.translate_timespec('5m 30s'))
     self.assertEqual(1728331, storm_utils.translate_timespec('20d 5m 31s'))
     self.assertEqual(12114041, storm_utils.translate_timespec('20w 5h 41s'))
     self.assertEqual(0, storm_utils.translate_timespec(''))
     with self.assertRaises(ValueError) as context:
         storm_utils.translate_timespec('20--')
     with self.assertRaises(ValueError) as context:
         storm_utils.translate_timespec('20Y 5m 3s')
Пример #3
0
 def test_timespec(self):
     self.assertEqual(330, storm_utils.translate_timespec('5m 30s'))
     self.assertEqual(1728331, storm_utils.translate_timespec('20d 5m 31s'))
     self.assertEqual(12114041,
                      storm_utils.translate_timespec('20w 5h 41s'))
     self.assertEqual(0, storm_utils.translate_timespec(''))
     with self.assertRaises(ValueError) as context:
         storm_utils.translate_timespec('20--')
     with self.assertRaises(ValueError) as context:
         storm_utils.translate_timespec('20Y 5m 3s')
Пример #4
0
 def report_supervisors(self, config, resp):
     self.gauge(self.metric(config, 'supervisors_total'),
                len(resp.get('supervisors', [])),
                tags=config.tags)
     for supe in resp.get('supervisors', []):
         supe_tags = [
             'storm_host:' + supe.get('host'),
         ] + config.tags
         self.gauge(self.metric(config, 'supervisor.slots_total'),
                    supe.get('slotsTotal'), tags=supe_tags)
         self.gauge(self.metric(config, 'supervisor.slots_used_total'),
                    supe.get('slotsUsed'), tags=supe_tags)
         self.gauge(self.metric(config, 'supervisor.uptime_seconds'),
                    storm_utils.translate_timespec(supe.get('uptime')), tags=supe_tags)
Пример #5
0
 def report_supervisors(self, config, resp):
     self.gauge(self.metric(config, 'supervisors_total'),
                len(resp.get('supervisors', [])),
                tags=config.tags)
     for supe in resp.get('supervisors', []):
         supe_tags = [
             'storm_host:' + supe.get('host'),
         ] + config.tags
         self.gauge(self.metric(config, 'supervisor.slots_total'),
                    supe.get('slotsTotal'), tags=supe_tags)
         self.gauge(self.metric(config, 'supervisor.slots_used_total'),
                    supe.get('slotsUsed'), tags=supe_tags)
         self.gauge(self.metric(config, 'supervisor.uptime_seconds'),
                    storm_utils.translate_timespec(supe.get('uptime')), tags=supe_tags)
Пример #6
0
 def report_topologies(self, config, topologies):
     """Report metadata about topologies that match the topologies regex.
     """
     for pretty_name, topo in topologies.iteritems():
         uptime = topo.get('uptime', '0s')
         tags = config.tags + [
             'storm_topology:' + pretty_name,
         ]
         self.gauge(self.metric(config, 'topologies.uptime_seconds'),
                    storm_utils.translate_timespec(uptime), tags=tags)
         self.gauge(self.metric(config, 'topologies.tasks_total'),
                    topo['tasksTotal'], tags=tags)
         self.gauge(self.metric(config, 'topologies.workers_total'),
                    topo['workersTotal'], tags=tags)
         self.gauge(self.metric(config, 'topologies.executors_total'), topo['executorsTotal'], tags=tags)
Пример #7
0
 def report_topologies(self, config, topologies):
     """Report metadata about topologies that match the topologies regex.
     """
     for pretty_name, topo in topologies.iteritems():
         uptime = topo.get('uptime', '0s')
         tags = config.tags + [
             'storm_topology:' + pretty_name,
         ]
         self.gauge(self.metric(config, 'topologies.uptime_seconds'),
                    storm_utils.translate_timespec(uptime), tags=tags)
         self.gauge(self.metric(config, 'topologies.tasks_total'),
                    topo['tasksTotal'], tags=tags)
         self.gauge(self.metric(config, 'topologies.workers_total'),
                    topo['workersTotal'], tags=tags)
         self.gauge(self.metric(config, 'topologies.executors_total'), topo['executorsTotal'], tags=tags)
Пример #8
0
    def report_executor_details(self, config, details):
        """
        Report statistics for a single topology's task ID's executors.
        """

        topology_name = self._topology_name(config, details)
        name = details['id']
        task_type = details['componentType']
        tags = config.tags + self.task_id_tags(config, task_type, name) + [
            'storm_topology:' + topology_name,
            'storm_component_type:' + task_type,
            'storm_task_id:' + details['id'],
        ]
        self.gauge(self.metric(config, 'executor.executors_total'),
                               details['executors'], tags=tags)
        self.gauge(self.metric(config, 'executor.tasks_total'),
                               details['executors'], tags=tags)

        # Embarrassingly, executorStats are undocumented in the REST
        # API docs (so we might not be allowed to rely on them). But
        # they're the only way to get some SERIOUSLY useful metrics -
        # per-host metrics, in particular.
        for executor in details['executorStats']:
            executor_tags = tags + [
                'executor_id:' + executor['id'],
                'storm_host:' + executor['host'],
                'storm_port:' + str(executor['port']),
            ]
            self.monotonic_count(self.metric(config, 'executor.emitted_total'),
                       executor.get('emitted', 0), tags=executor_tags)
            self.monotonic_count(self.metric(config, 'executor.transferred_total'),
                       executor.get('transferred', 0), tags=executor_tags)
            self.monotonic_count(self.metric(config, 'executor.acked_total'),
                       executor.get('acked', 0), tags=executor_tags)
            self.monotonic_count(self.metric(config, 'executor.executed_total'),
                       executor.get('executed', 0), tags=executor_tags)
            self.monotonic_count(self.metric(config, 'executor.failed_total'),
                       executor.get('failed', 0), tags=executor_tags)

            self.gauge(self.metric(config, 'executor.execute_latency_us'),
                       float(executor.get('executeLatency', 0)), tags=executor_tags)
            self.gauge(self.metric(config, 'executor.process_latency_us'),
                       float(executor.get('processLatency', 0)), tags=executor_tags)

            self.gauge(self.metric(config, 'executor.capacity_percent'),
                       float(executor.get('capacity', 0)) * 100, tags=executor_tags)
            self.gauge(self.metric(config, 'executor.uptime_seconds'),
                       storm_utils.translate_timespec(executor.get('uptime', '0s')), tags=executor_tags)
Пример #9
0
    def report_executor_details(self, config, details):
        """
        Report statistics for a single topology's task ID's executors.
        """

        topology_name = self._topology_name(config, details)
        name = details['id']
        task_type = details['componentType']
        tags = config.tags + self.task_id_tags(config, task_type, name) + [
            'storm_topology:' + topology_name,
            'storm_component_type:' + task_type,
            'storm_task_id:' + details['id'],
        ]
        self.gauge(self.metric(config, 'executor.executors_total'),
                               details['executors'], tags=tags)
        self.gauge(self.metric(config, 'executor.tasks_total'),
                               details['executors'], tags=tags)

        # Embarrassingly, executorStats are undocumented in the REST
        # API docs (so we might not be allowed to rely on them). But
        # they're the only way to get some SERIOUSLY useful metrics -
        # per-host metrics, in particular.
        for executor in details['executorStats']:
            executor_tags = tags + [
                'executor_id:' + executor['id'],
                'storm_host:' + executor['host'],
                'storm_port:' + str(executor['port']),
            ]
            self.monotonic_count(self.metric(config, 'executor.emitted_total'),
                       executor.get('emitted', 0), tags=executor_tags)
            self.monotonic_count(self.metric(config, 'executor.transferred_total'),
                       executor.get('transferred', 0), tags=executor_tags)
            self.monotonic_count(self.metric(config, 'executor.acked_total'),
                       executor.get('acked', 0), tags=executor_tags)
            self.monotonic_count(self.metric(config, 'executor.executed_total'),
                       executor.get('executed', 0), tags=executor_tags)
            self.monotonic_count(self.metric(config, 'executor.failed_total'),
                       executor.get('failed', 0), tags=executor_tags)

            self.gauge(self.metric(config, 'executor.execute_latency_us'),
                       float(executor.get('executeLatency', 0)), tags=executor_tags)
            self.gauge(self.metric(config, 'executor.process_latency_us'),
                       float(executor.get('processLatency', 0)), tags=executor_tags)

            self.gauge(self.metric(config, 'executor.capacity_percent'),
                       float(executor.get('capacity', 0)) * 100, tags=executor_tags)
            self.gauge(self.metric(config, 'executor.uptime_seconds'),
                       storm_utils.translate_timespec(executor.get('uptime', '0s')), tags=executor_tags)
Пример #10
0
 def report_cluster(self, config, cluster):
     self.gauge(self.metric(config, 'cluster.nimbus_uptime_seconds'),
                storm_utils.translate_timespec(cluster['nimbusUptime']),
                tags=config.tags)
     self.gauge(self.metric(config, 'cluster.slots_used_count'),
                cluster['slotsUsed'],
                tags=config.tags)
     self.gauge(self.metric(config, 'cluster.supervisor_count'),
                cluster['supervisors'],
                tags=config.tags)
     self.gauge(self.metric(config, 'cluster.slots_total_count'),
                cluster['slotsTotal'],
                tags=config.tags)
     self.gauge(self.metric(config, 'cluster.slots_free_count'),
                cluster['slotsFree'],
                tags=config.tags)
     self.gauge(self.metric(config, 'cluster.executors_total_count'),
                cluster['executorsTotal'],
                tags=config.tags)
     self.gauge(self.metric(config, 'cluster.tasks_total_count'),
                cluster['tasksTotal'],
                tags=config.tags)