def test_get_job_metrics(self): self.plugin.metrics[self.key] = {} self.plugin._get_job_metrics(self.apps) exp_mr_1 = MetricRecord("spark.job.num_active_tasks", "gauge", 1, self.expected_dim) exp_mr_2 = MetricRecord("spark.job.num_active_stages", "gauge", 2, self.expected_dim) exp_mr_3 = MetricRecord("spark.num_running_jobs", "gauge", 1, self.expected_dim) expected_records = [exp_mr_1, exp_mr_2, exp_mr_3] actual_records = list(self.plugin.metrics[self.key].values()) self._validate_metrics(expected_records, actual_records)
def test_get_stage_metrics(self): self.plugin.metrics[self.key] = {} self.plugin._get_stage_metrics(self.apps) exp_mr_1 = MetricRecord("spark.stage.input_bytes", "gauge", 1553, self.expected_dim) exp_mr_2 = MetricRecord("spark.stage.input_records", "gauge", 5, self.expected_dim) exp_mr_3 = MetricRecord("spark.num_active_stages", "gauge", 2, self.expected_dim) expected_records = [exp_mr_1, exp_mr_2, exp_mr_3] actual_records = list(self.plugin.metrics[self.key].values()) self._validate_metrics(expected_records, actual_records)
def test_get_streaming_metrics(self): self.plugin.metrics[self.key] = {} self.plugin._get_streaming_metrics(self.apps) exp_mr_1 = MetricRecord("spark.streaming.avg_input_rate", "gauge", 0.0, self.expected_dim) exp_mr_2 = MetricRecord("spark.streaming.avg_scheduling_delay", "gauge", 4, self.expected_dim) exp_mr_3 = MetricRecord("spark.streaming.avg_processing_time", "gauge", 93, self.expected_dim) exp_mr_4 = MetricRecord("spark.streaming.avg_total_delay", "gauge", 97, self.expected_dim) expected_records = [exp_mr_1, exp_mr_2, exp_mr_3, exp_mr_4] actual_records = list(self.plugin.metrics[self.key].values()) self._validate_metrics(expected_records, actual_records)
def test_get_executor_metrics(self): self.plugin.metrics[self.key] = {} self.plugin._get_executor_metrics(self.apps) exp_mr_1 = MetricRecord("spark.driver.memory_used", "counter", 30750, self.expected_dim) exp_mr_2 = MetricRecord("spark.driver.disk_used", "counter", 1155, self.expected_dim) exp_mr_3 = MetricRecord("spark.executor.memory_used", "counter", 34735, self.expected_dim) exp_mr_4 = MetricRecord("spark.executor.disk_used", "counter", 1973, self.expected_dim) exp_mr_5 = MetricRecord("spark.executor.count", "gauge", 3, self.expected_dim) expected_records = [exp_mr_1, exp_mr_2, exp_mr_3, exp_mr_4, exp_mr_5] actual_records = list(self.plugin.metrics[self.key].values()) self._validate_metrics(expected_records, actual_records)
def test_emit_record(self, mock_collectd): mock_collectd.return_value = self.mock_values metric_value = 1234567890 metric_dimensions = {'spark_process': 'master'} expected_type = 'counter' expected_values = [metric_value] expected_plugin_instance = '[spark_process=master]' expected_type_instance = 'master.aliveWorkers' expected_meta = {'true': 'true'} expected_plugin = 'apache_spark' record = MetricRecord(expected_type_instance, expected_type, metric_value, metric_dimensions) self.sink.emit(record) self.assertEquals(1, len(self.mock_values.dispatch_collector)) dispatched_value = self.mock_values.dispatch_collector[0] self.assertEquals(expected_plugin, dispatched_value.plugin) self.assertEquals(expected_values, dispatched_value.values) self.assertEquals(expected_type, dispatched_value.type) self.assertEquals(expected_type_instance, dispatched_value.type_instance) self.assertEquals(expected_plugin_instance, dispatched_value.plugin_instance) self.assertDictEqual(expected_meta, dispatched_value.meta)
def test_emit_record(self, mock_collectd): mock_collectd.return_value = self.mock_values metric_value = 1234567890 metric_dimensions = {"spark_process": "master"} expected_type = "counter" expected_values = [metric_value] expected_plugin_instance = "[spark_process=master]" expected_type_instance = "master.aliveWorkers" expected_meta = {"true": "true"} expected_plugin = "apache_spark" record = MetricRecord(expected_type_instance, expected_type, metric_value, metric_dimensions) self.sink.emit(record) self.assertEqual(1, len(self.mock_values.dispatch_collector)) dispatched_value = self.mock_values.dispatch_collector[0] self.assertEqual(expected_plugin, dispatched_value.plugin) self.assertEqual(expected_values, dispatched_value.values) self.assertEqual(expected_type, dispatched_value.type) self.assertEqual(expected_type_instance, dispatched_value.type_instance) self.assertEqual(expected_plugin_instance, dispatched_value.plugin_instance) self.assertDictEqual(expected_meta, dispatched_value.meta)
def test_get_metrics(self): resp = {} process = "" expected_resp = [] include = set() include.add("HiveExternalCatalog.fileCacheHits") self.plugin.include = include actual_resp = self.plugin.get_metrics(resp, process) self.assertListEqual(actual_resp, expected_resp) resp = { "gauges": { "fail_metric_name_1": { "value": 0 }, "master.workers": { "value": 100 } }, "counters": { "fail_metric_name_2": { "count": 0 }, "HiveExternalCatalog.fileCacheHits": { "count": 2 } } } self.plugin.global_dimensions = {"foo": "bar"} process = "master" expected_dim = {"foo": "bar", "spark_process": process} expected_resp = [ MetricRecord("master.workers", "gauge", 100, expected_dim), MetricRecord("HiveExternalCatalog.fileCacheHits", "counter", 2, expected_dim) ] actual_resp = self.plugin.get_metrics(resp, process) self.assertEqual(len(expected_resp), len(actual_resp)) self._validate_metrics(expected_resp, actual_resp)
def test_read_and_post_metrics(self): self.plugin.metric_address = "http://host" self.plugin.master_port = "8080" self.plugin.worker_ports = ["8081"] include = set() include.add("HiveExternalCatalog.fileCacheHits") self.plugin.include = include self.plugin.read() exp_mr_1 = MetricRecord("jvm.heap.committed", "gauge", 0.0257, {"spark_process": "master"}) exp_mr_2 = MetricRecord("jvm.heap.committed", "gauge", 0.0434, { "spark_process": "worker", "worker_port": "8081" }) exp_mr_3 = MetricRecord("jvm.heap.used", "gauge", 26716912, {"spark_process": "master"}) exp_mr_4 = MetricRecord("jvm.heap.used", "gauge", 45102544, { "spark_process": "worker", "worker_port": "8081" }) exp_mr_5 = MetricRecord("HiveExternalCatalog.fileCacheHits", "counter", 2, {"spark_process": "master"}) exp_mr_6 = MetricRecord("HiveExternalCatalog.fileCacheHits", "counter", 0, { "spark_process": "worker", "worker_port": "8081" }) expected_records = [ exp_mr_1, exp_mr_2, exp_mr_3, exp_mr_4, exp_mr_5, exp_mr_6 ] self._verify_records_captured(expected_records)
def test_standalone_read_and_post_metrics(self): exp_mr_1 = MetricRecord("spark.streaming.avg_input_rate", "gauge", 0.0, self.expected_mesos_dim) exp_mr_2 = MetricRecord("spark.streaming.avg_scheduling_delay", "gauge", 4, self.expected_mesos_dim) exp_mr_3 = MetricRecord("spark.streaming.avg_processing_time", "gauge", 93, self.expected_mesos_dim) exp_mr_4 = MetricRecord("spark.streaming.avg_total_delay", "gauge", 97, self.expected_mesos_dim) exp_mr_5 = MetricRecord("spark.job.num_active_tasks", "gauge", 1, self.expected_mesos_dim) exp_mr_6 = MetricRecord("spark.job.num_active_stages", "gauge", 2, self.expected_mesos_dim) exp_mr_7 = MetricRecord("spark.num_running_jobs", "gauge", 1, self.expected_mesos_dim) exp_mr_8 = MetricRecord("spark.stage.input_bytes", "gauge", 1553, self.expected_mesos_dim) exp_mr_9 = MetricRecord("spark.stage.input_records", "gauge", 5, self.expected_mesos_dim) exp_mr_10 = MetricRecord("spark.num_active_stages", "gauge", 2, self.expected_mesos_dim) exp_mr_11 = MetricRecord("spark.driver.memory_used", "counter", 30750, self.expected_mesos_dim) exp_mr_12 = MetricRecord("spark.driver.disk_used", "counter", 1155, self.expected_mesos_dim) exp_mr_13 = MetricRecord("spark.executor.memory_used", "counter", 34735, self.expected_mesos_dim) exp_mr_14 = MetricRecord("spark.executor.disk_used", "counter", 1973, self.expected_mesos_dim) exp_mr_15 = MetricRecord("spark.executor.count", "gauge", 3, self.expected_mesos_dim) expected_records = [ exp_mr_1, exp_mr_2, exp_mr_3, exp_mr_4, exp_mr_5, exp_mr_6, exp_mr_7, exp_mr_8, exp_mr_9, exp_mr_10, exp_mr_11, exp_mr_12, exp_mr_13, exp_mr_14, exp_mr_15 ] self.plugin.cluster_mode = "Mesos" self.plugin.read() self._verify_records_captured(expected_records)