def test_vcpus_by_all_second_stage(self, usage_manager, setter_manager, insert_manager, data_driven_specs_repo): # load components usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr() setter_manager.return_value = \ MockComponentManager.get_setter_cmpt_mgr() insert_manager.return_value = \ MockComponentManager.get_insert_pre_hourly_cmpt_mgr() # init mock driver tables data_driven_specs_repo.return_value = \ MockDataDrivenSpecsRepo( self.spark_context, self.get_pre_transform_specs_json_by_all(), self.get_transform_specs_json_by_all()) # Create an RDD out of the mocked Monasca metrics with open(SecondStageDataProvider.kafka_data_path_by_all) as f: raw_lines = f.read().splitlines() raw_tuple_list = [eval(raw_line) for raw_line in raw_lines] vm_cpu_rdd = self.spark_context.parallelize(raw_tuple_list) sql_context = SQLContext(self.spark_context) vm_cpu_df = sql_context.read.json(vm_cpu_rdd) PreHourlyProcessor.do_transform(vm_cpu_df) metrics = DummyAdapter.adapter_impl.metric_list vcpus_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'vcpus_agg' and value.get( 'metric').get('dimensions').get('project_id') == 'all' ][0] self.assertTrue(vcpus_agg_metric is not None) self.assertEqual(7.0, vcpus_agg_metric.get('metric').get('value')) self.assertEqual('useast', vcpus_agg_metric.get('meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, vcpus_agg_metric.get('meta').get('tenantId')) self.assertEqual( 'all', vcpus_agg_metric.get('metric').get('dimensions').get('host')) self.assertEqual( 'prehourly', vcpus_agg_metric.get('metric').get('dimensions').get( 'aggregation_period')) self.assertEqual( 14.0, vcpus_agg_metric.get('metric').get('value_meta').get( 'record_count')) self.assertEqual( '2016-01-20 16:40:00', vcpus_agg_metric.get('metric').get('value_meta').get( 'firstrecord_timestamp_string')) self.assertEqual( '2016-01-20 16:40:46', vcpus_agg_metric.get('metric').get('value_meta').get( 'lastrecord_timestamp_string'))
def invoke(): # object to keep track of offsets ConfigInitializer.basic_config() # app name application_name = "mon_metrics_kafka" my_spark_conf = SparkConf().setAppName(application_name) spark_context = SparkContext(conf=my_spark_conf) # read at the configured interval spark_streaming_context = \ StreamingContext(spark_context, cfg.CONF.service.stream_interval) kafka_stream = MonMetricsKafkaProcessor.get_kafka_stream( cfg.CONF.messaging.topic, spark_streaming_context) # transform to recordstore MonMetricsKafkaProcessor.transform_to_recordstore(kafka_stream) # catch interrupt, stop streaming context gracefully # signal.signal(signal.SIGINT, signal_handler) # start processing spark_streaming_context.start() # FIXME: stop spark context to relinquish resources # FIXME: specify cores, so as not to use all the resources on the cluster. # FIXME: HA deploy multiple masters, may be one on each control node try: # Wait for the Spark driver to "finish" spark_streaming_context.awaitTermination() except Exception as e: MonMetricsKafkaProcessor.log_debug( "Exception raised during Spark execution : " + str(e)) # One exception that can occur here is the result of the saved # kafka offsets being obsolete/out of range. Delete the saved # offsets to improve the chance of success on the next execution. # TODO(someone) prevent deleting all offsets for an application, # but just the latest revision MonMetricsKafkaProcessor.log_debug( "Deleting saved offsets for chance of success on next execution") MonMetricsKafkaProcessor.reset_kafka_offsets(application_name) # delete pre hourly processor offsets if cfg.CONF.stage_processors.pre_hourly_processor_enabled: PreHourlyProcessor.reset_kafka_offsets()
def rdd_to_recordstore(rdd_transform_context_rdd): if rdd_transform_context_rdd.isEmpty(): MonMetricsKafkaProcessor.log_debug( "rdd_to_recordstore: nothing to process...") else: sql_context = SQLContext.getOrCreate( rdd_transform_context_rdd.context) data_driven_specs_repo = DataDrivenSpecsRepoFactory.\ get_data_driven_specs_repo() pre_transform_specs_df = data_driven_specs_repo.\ get_data_driven_specs( sql_context=sql_context, data_driven_spec_type=DataDrivenSpecsRepo. pre_transform_specs_type) # # extract second column containing raw metric data # raw_mon_metrics = rdd_transform_context_rdd.map( lambda nt: nt.rdd_info[1]) # # convert raw metric data rdd to dataframe rdd # raw_mon_metrics_df = \ MonMetricUtils.create_mon_metrics_df_from_json_rdd( sql_context, raw_mon_metrics) # # filter out unwanted metrics and keep metrics we are interested in # cond = [ raw_mon_metrics_df.metric["name"] == pre_transform_specs_df.event_type] filtered_metrics_df = raw_mon_metrics_df.join( pre_transform_specs_df, cond) # # validate filtered metrics to check if required fields # are present and not empty # In order to be able to apply filter function had to convert # data frame rdd to normal rdd. After validation the rdd is # converted back to dataframe rdd # # FIXME: find a way to apply filter function on dataframe rdd data validated_mon_metrics_rdd = filtered_metrics_df.rdd.filter( MonMetricsKafkaProcessor._validate_raw_mon_metrics) validated_mon_metrics_df = sql_context.createDataFrame( validated_mon_metrics_rdd, filtered_metrics_df.schema) # # record generator # generate a new intermediate metric record if a given metric # metric_id_list, in pre_transform_specs table has several # intermediate metrics defined. # intermediate metrics are used as a convenient way to # process (aggregated) metric in mutiple ways by making a copy # of the source data for each processing # gen_mon_metrics_df = validated_mon_metrics_df.select( validated_mon_metrics_df.meta, validated_mon_metrics_df.metric, validated_mon_metrics_df.event_processing_params, validated_mon_metrics_df.event_type, explode(validated_mon_metrics_df.metric_id_list).alias( "this_metric_id"), validated_mon_metrics_df.service_id) # # transform metrics data to record_store format # record store format is the common format which will serve as # source to aggregation processing. # converting the metric to common standard format helps in writing # generic aggregation routines driven by configuration parameters # and can be reused # record_store_df = gen_mon_metrics_df.select( (gen_mon_metrics_df.metric.timestamp / 1000).alias( "event_timestamp_unix"), from_unixtime( gen_mon_metrics_df.metric.timestamp / 1000).alias( "event_timestamp_string"), gen_mon_metrics_df.event_type.alias("event_type"), gen_mon_metrics_df.event_type.alias("event_quantity_name"), (gen_mon_metrics_df.metric.value / 1.0).alias( "event_quantity"), when(gen_mon_metrics_df.metric.dimensions.state != '', gen_mon_metrics_df.metric.dimensions.state).otherwise( 'NA').alias("event_status"), lit('1.0').alias('event_version'), lit('metrics').alias("record_type"), # resource_uuid when(gen_mon_metrics_df.metric.dimensions.instanceId != '', gen_mon_metrics_df.metric.dimensions.instanceId).when( gen_mon_metrics_df.metric.dimensions.resource_id != '', gen_mon_metrics_df.metric.dimensions.resource_id). otherwise('NA').alias("resource_uuid"), when(gen_mon_metrics_df.metric.dimensions.tenantId != '', gen_mon_metrics_df.metric.dimensions.tenantId).when( gen_mon_metrics_df.metric.dimensions.tenant_id != '', gen_mon_metrics_df.metric.dimensions.tenant_id).when( gen_mon_metrics_df.metric.dimensions.project_id != '', gen_mon_metrics_df.metric.dimensions.project_id).otherwise( 'NA').alias("tenant_id"), when(gen_mon_metrics_df.metric.dimensions.mount != '', gen_mon_metrics_df.metric.dimensions.mount).otherwise( 'NA').alias("mount"), when(gen_mon_metrics_df.metric.dimensions.device != '', gen_mon_metrics_df.metric.dimensions.device).otherwise( 'NA').alias("device"), when(gen_mon_metrics_df.metric.dimensions.namespace != '', gen_mon_metrics_df.metric.dimensions.namespace).otherwise( 'NA').alias("namespace"), when(gen_mon_metrics_df.metric.dimensions.pod_name != '', gen_mon_metrics_df.metric.dimensions.pod_name).otherwise( 'NA').alias("pod_name"), when(gen_mon_metrics_df.metric.dimensions.container_name != '', gen_mon_metrics_df.metric.dimensions .container_name).otherwise('NA').alias("container_name"), when(gen_mon_metrics_df.metric.dimensions.app != '', gen_mon_metrics_df.metric.dimensions.app).otherwise( 'NA').alias("app"), when(gen_mon_metrics_df.metric.dimensions.interface != '', gen_mon_metrics_df.metric.dimensions.interface).otherwise( 'NA').alias("interface"), when(gen_mon_metrics_df.metric.dimensions.deployment != '', gen_mon_metrics_df.metric.dimensions .deployment).otherwise('NA').alias("deployment"), when(gen_mon_metrics_df.metric.dimensions.daemon_set != '', gen_mon_metrics_df.metric.dimensions .daemon_set).otherwise('NA').alias("daemon_set"), when(gen_mon_metrics_df.meta.userId != '', gen_mon_metrics_df.meta.userId).otherwise('NA').alias( "user_id"), when(gen_mon_metrics_df.meta.region != '', gen_mon_metrics_df.meta.region).when( gen_mon_metrics_df.event_processing_params .set_default_region_to != '', gen_mon_metrics_df.event_processing_params .set_default_region_to).otherwise( 'NA').alias("region"), when(gen_mon_metrics_df.meta.zone != '', gen_mon_metrics_df.meta.zone).when( gen_mon_metrics_df.event_processing_params .set_default_zone_to != '', gen_mon_metrics_df.event_processing_params .set_default_zone_to).otherwise( 'NA').alias("zone"), when(gen_mon_metrics_df.metric.dimensions.hostname != '', gen_mon_metrics_df.metric.dimensions.hostname).when( gen_mon_metrics_df.metric.value_meta.host != '', gen_mon_metrics_df.metric.value_meta.host).otherwise( 'NA').alias("host"), when(gen_mon_metrics_df.service_id != '', gen_mon_metrics_df.service_id).otherwise( 'NA').alias("service_group"), when(gen_mon_metrics_df.service_id != '', gen_mon_metrics_df.service_id).otherwise( 'NA').alias("service_id"), from_unixtime(gen_mon_metrics_df.metric.timestamp / 1000, 'yyyy-MM-dd').alias("event_date"), from_unixtime(gen_mon_metrics_df.metric.timestamp / 1000, 'HH').alias("event_hour"), from_unixtime(gen_mon_metrics_df.metric.timestamp / 1000, 'mm').alias("event_minute"), from_unixtime(gen_mon_metrics_df.metric.timestamp / 1000, 'ss').alias("event_second"), gen_mon_metrics_df.this_metric_id.alias("metric_group"), gen_mon_metrics_df.this_metric_id.alias("metric_id")) # # get transform context # rdd_transform_context = rdd_transform_context_rdd.first() transform_context = rdd_transform_context.transform_context_info # # cache record store rdd # if cfg.CONF.service.enable_record_store_df_cache: storage_level_prop = \ cfg.CONF.service.record_store_df_cache_storage_level try: storage_level = StorageUtils.get_storage_level( storage_level_prop) except InvalidCacheStorageLevelException as storage_error: storage_error.value += \ " (as specified in " \ "service.record_store_df_cache_storage_level)" raise record_store_df.persist(storage_level) # # start processing metrics available in record_store data # MonMetricsKafkaProcessor.process_metrics(transform_context, record_store_df) # remove df from cache if cfg.CONF.service.enable_record_store_df_cache: record_store_df.unpersist() # # extract kafka offsets and batch processing time # stored in transform_context and save offsets # offsets = transform_context.offset_info # batch time batch_time_info = \ transform_context.batch_time_info MonMetricsKafkaProcessor.save_kafka_offsets( offsets, rdd_transform_context_rdd.context.appName, batch_time_info) # call pre hourly processor, if its time to run if (cfg.CONF.stage_processors.pre_hourly_processor_enabled and PreHourlyProcessor.is_time_to_run(batch_time_info)): PreHourlyProcessor.run_processor( record_store_df.rdd.context, batch_time_info)
def test_fetch_quantity_sum(self, usage_manager, setter_manager, insert_manager, data_driven_specs_repo): # test operation test_operation = "sum" # load components usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr() setter_manager.return_value = \ MockComponentManager.get_setter_cmpt_mgr() insert_manager.return_value = \ MockComponentManager.get_insert_pre_hourly_cmpt_mgr() # init mock driver tables data_driven_specs_repo.return_value = \ MockDataDrivenSpecsRepo(self.spark_context, self.get_pre_transform_specs_json(), self.get_transform_specs_json_by_operation( test_operation)) # Create an emulated set of Kafka messages (these were gathered # by extracting Monasca messages from the Metrics queue on mini-mon). # Create an RDD out of the mocked Monasca metrics with open(DataProvider.fetch_quantity_data_path) as f: raw_lines = f.read().splitlines() raw_tuple_list = [eval(raw_line) for raw_line in raw_lines] rdd_monasca = self.spark_context.parallelize(raw_tuple_list) # decorate mocked RDD with dummy kafka offsets myOffsetRanges = [ OffsetRange("metrics", 1, 10, 20)] # mimic rdd.offsetRanges() transform_context = TransformContextUtils.get_context( offset_info=myOffsetRanges, batch_time_info=self.get_dummy_batch_time()) rdd_monasca_with_offsets = rdd_monasca.map( lambda x: RddTransformContext(x, transform_context)) # Call the primary method in mon_metrics_kafka MonMetricsKafkaProcessor.rdd_to_recordstore( rdd_monasca_with_offsets) # get the metrics that have been submitted to the dummy message adapter instance_usage_list = DummyAdapter.adapter_impl.metric_list instance_usage_list = map(dump_as_ascii_string, instance_usage_list) DummyAdapter.adapter_impl.metric_list = [] instance_usage_rdd = self.spark_context.parallelize( instance_usage_list) sql_context = SQLContext(self.spark_context) instance_usage_df = sql_context.read.json(instance_usage_rdd) PreHourlyProcessor.do_transform(instance_usage_df) metrics = DummyAdapter.adapter_impl.metric_list mem_total_mb_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'mem.total_mb_agg' and value.get('metric').get('dimensions').get('host') == 'mini-mon'][0] self.assertTrue(mem_total_mb_agg_metric is not None) self.assertEqual('mem.total_mb_agg', mem_total_mb_agg_metric .get('metric').get('name')) self.assertEqual(15360.0, mem_total_mb_agg_metric .get('metric').get('value')) self.assertEqual('useast', mem_total_mb_agg_metric .get('meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, mem_total_mb_agg_metric .get('meta').get('tenantId')) self.assertEqual('mini-mon', mem_total_mb_agg_metric .get('metric').get('dimensions').get('host')) self.assertEqual('all', mem_total_mb_agg_metric .get('metric').get('dimensions').get('project_id')) self.assertEqual('hourly', mem_total_mb_agg_metric .get('metric').get('dimensions') .get('aggregation_period')) self.assertEqual(4.0, mem_total_mb_agg_metric .get('metric').get('value_meta').get('record_count')) self.assertEqual('2016-01-20 16:40:00', mem_total_mb_agg_metric .get('metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-01-20 16:40:46', mem_total_mb_agg_metric .get('metric').get('value_meta') .get('lastrecord_timestamp_string'))
def test_fetch_quantity_sum_second_stage(self, usage_manager, setter_manager, insert_manager, data_driven_specs_repo): # test operation test_operation = "sum" # load components usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr() setter_manager.return_value = \ MockComponentManager.get_setter_cmpt_mgr() insert_manager.return_value = \ MockComponentManager.get_insert_pre_hourly_cmpt_mgr() # init mock driver tables data_driven_specs_repo.return_value = \ MockDataDrivenSpecsRepo(self.spark_context, self.get_pre_transform_specs_json(), self.get_transform_specs_json_by_operation( test_operation)) # Create an emulated set of Kafka messages (these were gathered # by extracting Monasca messages from the Metrics queue on mini-mon). # Create an RDD out of the mocked Monasca metrics with open(SecondStageDataProvider.fetch_quantity_data_path) as f: raw_lines = f.read().splitlines() raw_tuple_list = [eval(raw_line) for raw_line in raw_lines] instance_usage_list = map(dump_as_ascii_string, raw_tuple_list) # create a json RDD from instance_usage_list instance_usage_rdd = self.spark_context.parallelize( instance_usage_list) sql_context = SQLContext(self.spark_context) instance_usage_df = sql_context.read.json( instance_usage_rdd) # call pre hourly processor PreHourlyProcessor.do_transform(instance_usage_df) metrics = DummyAdapter.adapter_impl.metric_list mem_total_mb_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'mem.total_mb_agg' and value.get('metric').get('dimensions').get('host') == 'mini-mon'][0] self.assertTrue(mem_total_mb_agg_metric is not None) self.assertEqual('mem.total_mb_agg', mem_total_mb_agg_metric .get('metric').get('name')) self.assertEqual(8679.0, mem_total_mb_agg_metric .get('metric').get('value')) self.assertEqual('useast', mem_total_mb_agg_metric .get('meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, mem_total_mb_agg_metric .get('meta').get('tenantId')) self.assertEqual('mini-mon', mem_total_mb_agg_metric .get('metric').get('dimensions').get('host')) self.assertEqual('all', mem_total_mb_agg_metric .get('metric').get('dimensions').get('project_id')) self.assertEqual('prehourly', mem_total_mb_agg_metric .get('metric').get('dimensions') .get('aggregation_period')) self.assertEqual(39.0, mem_total_mb_agg_metric .get('metric').get('value_meta').get('record_count')) self.assertEqual('2016-01-20 16:40:00', mem_total_mb_agg_metric .get('metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-01-20 17:40:00', mem_total_mb_agg_metric .get('metric').get('value_meta') .get('lastrecord_timestamp_string'))
def test_rdd_to_recordstore(self, usage_manager, setter_manager, insert_manager): usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr() setter_manager.return_value = \ MockComponentManager.get_setter_cmpt_mgr() insert_manager.return_value = \ MockComponentManager.get_insert_pre_hourly_cmpt_mgr() # Create an RDD out of the mocked Monasca metrics with open(DataProvider.kafka_data_path) as f: raw_lines = f.read().splitlines() raw_tuple_list = [eval(raw_line) for raw_line in raw_lines] rdd_monasca = self.spark_context.parallelize(raw_tuple_list) # decorate mocked RDD with dummy kafka offsets myOffsetRanges = [ OffsetRange("metrics", 1, 10, 20)] # mimic rdd.offsetRanges() transform_context = TransformContextUtils.get_context( offset_info=myOffsetRanges, batch_time_info=self.get_dummy_batch_time()) rdd_monasca_with_offsets = rdd_monasca.map( lambda x: RddTransformContext(x, transform_context)) # Call the primary method in mon_metrics_kafka MonMetricsKafkaProcessor.rdd_to_recordstore( rdd_monasca_with_offsets) host_usage_list = DummyAdapter.adapter_impl.metric_list host_usage_list = map(dump_as_ascii_string, host_usage_list) DummyAdapter.adapter_impl.metric_list = [] host_usage_rdd = self.spark_context.parallelize(host_usage_list) sql_context = SQLContext(self.spark_context) host_usage_df = sql_context.read.json(host_usage_rdd) PreHourlyProcessor.do_transform(host_usage_df) # get the metrics that have been submitted to the dummy message adapter metrics = DummyAdapter.adapter_impl.metric_list # Verify cpu.total_logical_cores_agg for all hosts total_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.total_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'all'][0] self.assertEqual(15.0, total_cpu_logical_agg_metric.get( 'metric').get('value')) self.assertEqual('useast', total_cpu_logical_agg_metric.get( 'meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, total_cpu_logical_agg_metric.get( 'meta').get('tenantId')) self.assertEqual('all', total_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('project_id')) self.assertEqual('hourly', total_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('aggregation_period')) self.assertEqual(13.0, total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('record_count')) self.assertEqual('2016-03-07 16:09:23', total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-03-07 16:10:38', total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('lastrecord_timestamp_string')) # Verify cpu.total_logical_cores_agg for mini-mon host total_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.total_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'test-cp1-comp0333-mgmt'][0] self.assertEqual(9.0, total_cpu_logical_agg_metric.get( 'metric').get('value')) self.assertEqual('useast', total_cpu_logical_agg_metric.get( 'meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, total_cpu_logical_agg_metric.get( 'meta').get('tenantId')) self.assertEqual('all', total_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('project_id')) self.assertEqual('hourly', total_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('aggregation_period')) self.assertEqual(6.0, total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('record_count')) self.assertEqual('2016-03-07 16:09:23', total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-03-07 16:10:38', total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('lastrecord_timestamp_string')) # Verify cpu.total_logical_cores_agg for devstack host total_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.total_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'test-cp1-comp0027-mgmt'][0] self.assertEqual(6.0, total_cpu_logical_agg_metric.get( 'metric').get('value')) self.assertEqual('useast', total_cpu_logical_agg_metric.get( 'meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, total_cpu_logical_agg_metric.get( 'meta').get('tenantId')) self.assertEqual('all', total_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('project_id')) self.assertEqual('hourly', total_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('aggregation_period')) self.assertEqual(7.0, total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('record_count')) self.assertEqual('2016-03-07 16:09:23', total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-03-07 16:10:38', total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('lastrecord_timestamp_string')) # Verify cpu.utilized_logical_cores_agg for all hosts utilized_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'all'][0] self.assertEqual(7.134214285714285, utilized_cpu_logical_agg_metric.get( 'metric').get('value')) self.assertEqual('useast', utilized_cpu_logical_agg_metric.get( 'meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, utilized_cpu_logical_agg_metric.get( 'meta').get('tenantId')) self.assertEqual('all', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('project_id')) self.assertEqual('hourly', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('aggregation_period')) self.assertEqual(13.0, utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('record_count')) self.assertEqual('2016-03-07 16:09:23', utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-03-07 16:10:38', utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('lastrecord_timestamp_string')) # Verify cpu.utilized_logical_cores_agg for the mini-mon host utilized_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'test-cp1-comp0333-mgmt'][0] self.assertEqual(4.9665, utilized_cpu_logical_agg_metric.get( 'metric').get('value')) self.assertEqual('useast', utilized_cpu_logical_agg_metric.get( 'meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, utilized_cpu_logical_agg_metric.get( 'meta').get('tenantId')) self.assertEqual('all', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('project_id')) self.assertEqual('hourly', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('aggregation_period')) self.assertEqual(6.0, utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('record_count')) self.assertEqual('2016-03-07 16:09:23', utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-03-07 16:10:38', utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('lastrecord_timestamp_string')) # Verify cpu.utilized_logical_cores_agg for the devstack host utilized_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'test-cp1-comp0027-mgmt'][0] self.assertEqual(2.1677142857142853, utilized_cpu_logical_agg_metric.get( 'metric').get('value')) self.assertEqual('useast', utilized_cpu_logical_agg_metric.get( 'meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, utilized_cpu_logical_agg_metric.get( 'meta').get('tenantId')) self.assertEqual('all', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('project_id')) self.assertEqual('hourly', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('aggregation_period')) self.assertEqual(7.0, utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('record_count')) self.assertEqual('2016-03-07 16:09:23', utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-03-07 16:10:38', utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('lastrecord_timestamp_string'))
def rdd_to_recordstore(rdd_transform_context_rdd): if rdd_transform_context_rdd.isEmpty(): MonMetricsKafkaProcessor.log_debug( "rdd_to_recordstore: nothing to process...") else: sql_context = SQLContext(rdd_transform_context_rdd.context) data_driven_specs_repo = DataDrivenSpecsRepoFactory.\ get_data_driven_specs_repo() pre_transform_specs_df = data_driven_specs_repo.\ get_data_driven_specs( sql_context=sql_context, data_driven_spec_type=DataDrivenSpecsRepo. pre_transform_specs_type) # # extract second column containing raw metric data # raw_mon_metrics = rdd_transform_context_rdd.map( lambda nt: nt.rdd_info[1]) # # convert raw metric data rdd to dataframe rdd # raw_mon_metrics_df = \ MonMetricUtils.create_mon_metrics_df_from_json_rdd( sql_context, raw_mon_metrics) # # filter out unwanted metrics and keep metrics we are interested in # cond = [ raw_mon_metrics_df.metric.name == pre_transform_specs_df.event_type] filtered_metrics_df = raw_mon_metrics_df.join( pre_transform_specs_df, cond) # # validate filtered metrics to check if required fields # are present and not empty # In order to be able to apply filter function had to convert # data frame rdd to normal rdd. After validation the rdd is # converted back to dataframe rdd # # FIXME: find a way to apply filter function on dataframe rdd data validated_mon_metrics_rdd = filtered_metrics_df.rdd.filter( MonMetricsKafkaProcessor._validate_raw_mon_metrics) validated_mon_metrics_df = sql_context.createDataFrame( validated_mon_metrics_rdd, filtered_metrics_df.schema) # # record generator # generate a new intermediate metric record if a given metric # metric_id_list, in pre_transform_specs table has several # intermediate metrics defined. # intermediate metrics are used as a convenient way to # process (aggregated) metric in mutiple ways by making a copy # of the source data for each processing # gen_mon_metrics_df = validated_mon_metrics_df.select( validated_mon_metrics_df.meta, validated_mon_metrics_df.metric, validated_mon_metrics_df.event_processing_params, validated_mon_metrics_df.event_type, explode(validated_mon_metrics_df.metric_id_list).alias( "this_metric_id"), validated_mon_metrics_df.service_id) # # transform metrics data to record_store format # record store format is the common format which will serve as # source to aggregation processing. # converting the metric to common standard format helps in writing # generic aggregation routines driven by configuration parameters # and can be reused # record_store_df = gen_mon_metrics_df.select( (gen_mon_metrics_df.metric.timestamp / 1000).alias( "event_timestamp_unix"), from_unixtime( gen_mon_metrics_df.metric.timestamp / 1000).alias( "event_timestamp_string"), gen_mon_metrics_df.event_type.alias("event_type"), gen_mon_metrics_df.event_type.alias("event_quantity_name"), (gen_mon_metrics_df.metric.value / 1.0).alias( "event_quantity"), when(gen_mon_metrics_df.metric.dimensions.state != '', gen_mon_metrics_df.metric.dimensions.state).otherwise( 'NA').alias("event_status"), lit('1.0').alias('event_version'), lit('metrics').alias("record_type"), # resource_uuid when(gen_mon_metrics_df.metric.dimensions.instanceId != '', gen_mon_metrics_df.metric.dimensions.instanceId).when( gen_mon_metrics_df.metric.dimensions.resource_id != '', gen_mon_metrics_df.metric.dimensions.resource_id). otherwise('NA').alias("resource_uuid"), when(gen_mon_metrics_df.metric.dimensions.tenantId != '', gen_mon_metrics_df.metric.dimensions.tenantId).when( gen_mon_metrics_df.metric.dimensions.tenant_id != '', gen_mon_metrics_df.metric.dimensions.tenant_id).when( gen_mon_metrics_df.metric.dimensions.project_id != '', gen_mon_metrics_df.metric.dimensions.project_id).otherwise( 'NA').alias("tenant_id"), when(gen_mon_metrics_df.metric.dimensions.mount != '', gen_mon_metrics_df.metric.dimensions.mount).otherwise( 'NA').alias("mount"), when(gen_mon_metrics_df.metric.dimensions.device != '', gen_mon_metrics_df.metric.dimensions.device).otherwise( 'NA').alias("device"), when(gen_mon_metrics_df.meta.userId != '', gen_mon_metrics_df.meta.userId).otherwise('NA').alias( "user_id"), when(gen_mon_metrics_df.meta.region != '', gen_mon_metrics_df.meta.region).when( gen_mon_metrics_df.event_processing_params .set_default_region_to != '', gen_mon_metrics_df.event_processing_params .set_default_region_to).otherwise( 'NA').alias("region"), when(gen_mon_metrics_df.meta.zone != '', gen_mon_metrics_df.meta.zone).when( gen_mon_metrics_df.event_processing_params .set_default_zone_to != '', gen_mon_metrics_df.event_processing_params .set_default_zone_to).otherwise( 'NA').alias("zone"), when(gen_mon_metrics_df.metric.dimensions.hostname != '', gen_mon_metrics_df.metric.dimensions.hostname).when( gen_mon_metrics_df.metric.value_meta.host != '', gen_mon_metrics_df.metric.value_meta.host).otherwise( 'NA').alias("host"), when(gen_mon_metrics_df.service_id != '', gen_mon_metrics_df.service_id).otherwise( 'NA').alias("service_group"), when(gen_mon_metrics_df.service_id != '', gen_mon_metrics_df.service_id).otherwise( 'NA').alias("service_id"), from_unixtime(gen_mon_metrics_df.metric.timestamp / 1000, 'yyyy-MM-dd').alias("event_date"), from_unixtime(gen_mon_metrics_df.metric.timestamp / 1000, 'HH').alias("event_hour"), from_unixtime(gen_mon_metrics_df.metric.timestamp / 1000, 'mm').alias("event_minute"), from_unixtime(gen_mon_metrics_df.metric.timestamp / 1000, 'ss').alias("event_second"), gen_mon_metrics_df.this_metric_id.alias("metric_group"), gen_mon_metrics_df.this_metric_id.alias("metric_id")) # # get transform context # rdd_transform_context = rdd_transform_context_rdd.first() transform_context = rdd_transform_context.transform_context_info # # cache record store rdd # if cfg.CONF.service.enable_record_store_df_cache: storage_level_prop = \ cfg.CONF.service.record_store_df_cache_storage_level storage_level = StorageUtils.get_storage_level( storage_level_prop) record_store_df.persist(storage_level) # # start processing metrics available in record_store data # MonMetricsKafkaProcessor.process_metrics(transform_context, record_store_df) # remove df from cache if cfg.CONF.service.enable_record_store_df_cache: record_store_df.unpersist() # # extract kafka offsets and batch processing time # stored in transform_context and save offsets # offsets = transform_context.offset_info # batch time batch_time_info = \ transform_context.batch_time_info MonMetricsKafkaProcessor.save_kafka_offsets( offsets, rdd_transform_context_rdd.context.appName, batch_time_info) # call pre hourly processor, if its time to run if (cfg.CONF.stage_processors.pre_hourly_processor_enabled is True and PreHourlyProcessor.is_time_to_run( batch_time_info)): PreHourlyProcessor.run_processor( record_store_df.rdd.context, batch_time_info)
def test_pre_hourly_processor(self, offset_range_list, pre_hourly_data): # load components myOffsetRanges = [ OffsetRange("metrics_pre_hourly", 1, 10, 20)] offset_range_list.return_value = myOffsetRanges # Create an RDD out of the mocked instance usage data with open(DataProvider.metrics_pre_hourly_data_path) as f: raw_lines = f.read().splitlines() raw_tuple_list = [eval(raw_line) for raw_line in raw_lines] pre_hourly_rdd_data = self.spark_context.parallelize(raw_tuple_list) pre_hourly_data.return_value = pre_hourly_rdd_data # Do something simple with the RDD result = self.simple_count_transform(pre_hourly_rdd_data) # run pre hourly processor PreHourlyProcessor.run_processor( self.spark_context, self.get_dummy_batch_time()) # get the metrics that have been submitted to the dummy message adapter metrics = DummyAdapter.adapter_impl.metric_list # Verify count of instance usage data self.assertEqual(result, 6) # check aggregation result mem_total_mb_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'mem.total_mb_agg' and value.get('metric').get('dimensions').get('host') == 'all'][0] self.assertTrue(mem_total_mb_agg_metric is not None) self.assertEqual(16049.0, mem_total_mb_agg_metric .get('metric').get('value')) # agg meta self.assertEqual("2016-06-20 11:49:44", mem_total_mb_agg_metric .get("metric") .get('value_meta').get('lastrecord_timestamp')) self.assertEqual("2016-06-20 11:24:59", mem_total_mb_agg_metric .get("metric") .get('value_meta').get('firstrecord_timestamp')) self.assertEqual(60.0, mem_total_mb_agg_metric .get("metric") .get('value_meta').get('record_count')) mem_usable_mb_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'mem.usable_mb_agg' and value.get('metric').get('dimensions').get('host') == 'all'][0] self.assertTrue(mem_usable_mb_agg_metric is not None) self.assertEqual(10283.1, mem_usable_mb_agg_metric .get('metric').get('value')) # agg meta self.assertEqual("2016-06-20 11:49:44", mem_usable_mb_agg_metric .get("metric") .get('value_meta').get('lastrecord_timestamp')) self.assertEqual("2016-06-20 11:24:59", mem_usable_mb_agg_metric .get("metric") .get('value_meta').get('firstrecord_timestamp')) self.assertEqual(60.0, mem_usable_mb_agg_metric .get("metric") .get('value_meta').get('record_count'))
def test_vcpus_by_all_second_stage(self, usage_manager, setter_manager, insert_manager, data_driven_specs_repo): # load components usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr() setter_manager.return_value = \ MockComponentManager.get_setter_cmpt_mgr() insert_manager.return_value = \ MockComponentManager.get_insert_pre_hourly_cmpt_mgr() # init mock driver tables data_driven_specs_repo.return_value = \ MockDataDrivenSpecsRepo( self.spark_context, self.get_pre_transform_specs_json_by_all(), self.get_transform_specs_json_by_all()) # Create an RDD out of the mocked Monasca metrics with open(SecondStageDataProvider.kafka_data_path_by_all) as f: raw_lines = f.read().splitlines() raw_tuple_list = [eval(raw_line) for raw_line in raw_lines] vm_cpu_rdd = self.spark_context.parallelize(raw_tuple_list) sql_context = SQLContext(self.spark_context) vm_cpu_df = sql_context.read.json(vm_cpu_rdd) PreHourlyProcessor.do_transform(vm_cpu_df) metrics = DummyAdapter.adapter_impl.metric_list vcpus_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'vcpus_agg' and value.get('metric').get('dimensions').get('project_id') == 'all'][0] self.assertTrue(vcpus_agg_metric is not None) self.assertEqual(7.0, vcpus_agg_metric .get('metric').get('value')) self.assertEqual('useast', vcpus_agg_metric .get('meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, vcpus_agg_metric .get('meta').get('tenantId')) self.assertEqual('all', vcpus_agg_metric .get('metric').get('dimensions').get('host')) self.assertEqual('prehourly', vcpus_agg_metric .get('metric').get('dimensions') .get('aggregation_period')) self.assertEqual(14.0, vcpus_agg_metric .get('metric').get('value_meta').get('record_count')) self.assertEqual('2016-01-20 16:40:00', vcpus_agg_metric .get('metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-01-20 16:40:46', vcpus_agg_metric .get('metric').get('value_meta') .get('lastrecord_timestamp_string'))
def test_fetch_quantity_avg(self, usage_manager, setter_manager, insert_manager, data_driven_specs_repo): # test operation test_operation = "avg" # load components usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr() setter_manager.return_value = \ MockComponentManager.get_setter_cmpt_mgr() insert_manager.return_value = \ MockComponentManager.get_insert_pre_hourly_cmpt_mgr() # init mock driver tables data_driven_specs_repo.return_value = \ MockDataDrivenSpecsRepo(self.spark_context, self.get_pre_transform_specs_json(), self.get_transform_specs_json_by_operation( test_operation, 'hourly')) # Create an RDD out of the mocked Monasca metrics with open(DataProvider.kafka_data_path) as f: raw_lines = f.read().splitlines() raw_tuple_list = [eval(raw_line) for raw_line in raw_lines] rdd_monasca = self.spark_context.parallelize(raw_tuple_list) # decorate mocked RDD with dummy kafka offsets myOffsetRanges = [ OffsetRange("metrics", 1, 10, 20)] # mimic rdd.offsetRanges() transform_context = TransformContextUtils.get_context( offset_info=myOffsetRanges, batch_time_info=self.get_dummy_batch_time()) rdd_monasca_with_offsets = rdd_monasca.map( lambda x: RddTransformContext(x, transform_context)) # Call the primary method in mon_metrics_kafka MonMetricsKafkaProcessor.rdd_to_recordstore( rdd_monasca_with_offsets) # get the metrics that have been submitted to the dummy message adapter metrics = DummyAdapter.adapter_impl.metric_list quantity_util_list = map(dump_as_ascii_string, metrics) DummyAdapter.adapter_impl.metric_list = [] quantity_util_rdd = self.spark_context.parallelize(quantity_util_list) sql_context = SQLContext(self.spark_context) quantity_util_df = sql_context.read.json(quantity_util_rdd) PreHourlyProcessor.do_transform(quantity_util_df) metrics = DummyAdapter.adapter_impl.metric_list utilized_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get( 'name') == 'cpu.utilized_logical_cores_agg'][0] self.assertEqual(7.134214285714285, utilized_cpu_logical_agg_metric.get( 'metric').get('value')) self.assertEqual('useast', utilized_cpu_logical_agg_metric.get( 'meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, utilized_cpu_logical_agg_metric.get( 'meta').get('tenantId')) self.assertEqual('all', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('host')) self.assertEqual('all', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('project_id')) self.assertEqual('hourly', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('aggregation_period')) self.assertEqual(13.0, utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('record_count')) self.assertEqual('2016-03-07 16:09:23', utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-03-07 16:10:38', utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('lastrecord_timestamp_string'))
def test_fetch_quantity_avg_second_stage(self, usage_manager, setter_manager, insert_manager, data_driven_specs_repo): # test operation test_operation = "avg" # load components usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr() setter_manager.return_value = \ MockComponentManager.get_setter_cmpt_mgr() insert_manager.return_value = \ MockComponentManager.get_insert_pre_hourly_cmpt_mgr() # init mock driver tables data_driven_specs_repo.return_value = \ MockDataDrivenSpecsRepo(self.spark_context, self.get_pre_transform_specs_json(), self.get_transform_specs_json_by_operation( test_operation, 'prehourly')) # Create an RDD out of the mocked Monasca metrics with open(SecondStageDataProvider.kafka_data_path) as f: raw_lines = f.read().splitlines() raw_tuple_list = [eval(raw_line) for raw_line in raw_lines] util_list = map(dump_as_ascii_string, raw_tuple_list) quantity_util_rdd = self.spark_context.parallelize(util_list) sql_context = SQLContext(self.spark_context) quantity_util_df = sql_context.read.json(quantity_util_rdd) PreHourlyProcessor.do_transform(quantity_util_df) metrics = DummyAdapter.adapter_impl.metric_list utilized_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get( 'name') == 'cpu.utilized_logical_cores_agg'][0] self.assertEqual(7.134214285714285, utilized_cpu_logical_agg_metric.get( 'metric').get('value')) self.assertEqual('useast', utilized_cpu_logical_agg_metric.get( 'meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, utilized_cpu_logical_agg_metric.get( 'meta').get('tenantId')) self.assertEqual('all', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('host')) self.assertEqual('all', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('project_id')) self.assertEqual('hourly', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('aggregation_period')) self.assertEqual(13.0, utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('record_count')) self.assertEqual('2016-03-07 16:09:23', utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-03-07 16:10:38', utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('lastrecord_timestamp_string'))
def test_rdd_to_recordstore(self, usage_manager, setter_manager, insert_manager): usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr() setter_manager.return_value = \ MockComponentManager.get_setter_cmpt_mgr() insert_manager.return_value = \ MockComponentManager.get_insert_pre_hourly_cmpt_mgr() # Create an RDD out of the mocked Monasca metrics with open(DataProvider.kafka_data_path) as f: raw_lines = f.read().splitlines() raw_tuple_list = [eval(raw_line) for raw_line in raw_lines] rdd_monasca = self.spark_context.parallelize(raw_tuple_list) # decorate mocked RDD with dummy kafka offsets myOffsetRanges = [OffsetRange("metrics", 1, 10, 20)] # mimic rdd.offsetRanges() transform_context = TransformContextUtils.get_context( offset_info=myOffsetRanges, batch_time_info=self.get_dummy_batch_time()) rdd_monasca_with_offsets = rdd_monasca.map( lambda x: RddTransformContext(x, transform_context)) # Call the primary method in mon_metrics_kafka MonMetricsKafkaProcessor.rdd_to_recordstore(rdd_monasca_with_offsets) host_usage_list = DummyAdapter.adapter_impl.metric_list host_usage_list = map(dump_as_ascii_string, host_usage_list) DummyAdapter.adapter_impl.metric_list = [] host_usage_rdd = self.spark_context.parallelize(host_usage_list) sql_context = SQLContext(self.spark_context) host_usage_df = sql_context.read.json(host_usage_rdd) PreHourlyProcessor.do_transform(host_usage_df) # get the metrics that have been submitted to the dummy message adapter metrics = DummyAdapter.adapter_impl.metric_list # Verify cpu.total_logical_cores_agg for all hosts total_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.total_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'all' ][0] self.assertEqual( 15.0, total_cpu_logical_agg_metric.get('metric').get('value')) self.assertEqual( 'useast', total_cpu_logical_agg_metric.get('meta').get('region')) self.assertEqual( cfg.CONF.messaging.publish_kafka_project_id, total_cpu_logical_agg_metric.get('meta').get('tenantId')) self.assertEqual( 'all', total_cpu_logical_agg_metric.get('metric').get('dimensions').get( 'project_id')) self.assertEqual( 'hourly', total_cpu_logical_agg_metric.get('metric').get('dimensions').get( 'aggregation_period')) self.assertEqual( 13.0, total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'record_count')) self.assertEqual( '2016-03-07 16:09:23', total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'firstrecord_timestamp_string')) self.assertEqual( '2016-03-07 16:10:38', total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'lastrecord_timestamp_string')) # Verify cpu.total_logical_cores_agg for mini-mon host total_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.total_logical_cores_agg' and value.get('metric').get('dimensions').get( 'host') == 'test-cp1-comp0333-mgmt' ][0] self.assertEqual( 9.0, total_cpu_logical_agg_metric.get('metric').get('value')) self.assertEqual( 'useast', total_cpu_logical_agg_metric.get('meta').get('region')) self.assertEqual( cfg.CONF.messaging.publish_kafka_project_id, total_cpu_logical_agg_metric.get('meta').get('tenantId')) self.assertEqual( 'all', total_cpu_logical_agg_metric.get('metric').get('dimensions').get( 'project_id')) self.assertEqual( 'hourly', total_cpu_logical_agg_metric.get('metric').get('dimensions').get( 'aggregation_period')) self.assertEqual( 6.0, total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'record_count')) self.assertEqual( '2016-03-07 16:09:23', total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'firstrecord_timestamp_string')) self.assertEqual( '2016-03-07 16:10:38', total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'lastrecord_timestamp_string')) # Verify cpu.total_logical_cores_agg for devstack host total_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.total_logical_cores_agg' and value.get('metric').get('dimensions').get( 'host') == 'test-cp1-comp0027-mgmt' ][0] self.assertEqual( 6.0, total_cpu_logical_agg_metric.get('metric').get('value')) self.assertEqual( 'useast', total_cpu_logical_agg_metric.get('meta').get('region')) self.assertEqual( cfg.CONF.messaging.publish_kafka_project_id, total_cpu_logical_agg_metric.get('meta').get('tenantId')) self.assertEqual( 'all', total_cpu_logical_agg_metric.get('metric').get('dimensions').get( 'project_id')) self.assertEqual( 'hourly', total_cpu_logical_agg_metric.get('metric').get('dimensions').get( 'aggregation_period')) self.assertEqual( 7.0, total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'record_count')) self.assertEqual( '2016-03-07 16:09:23', total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'firstrecord_timestamp_string')) self.assertEqual( '2016-03-07 16:10:38', total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'lastrecord_timestamp_string')) # Verify cpu.utilized_logical_cores_agg for all hosts utilized_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'all' ][0] self.assertEqual( 7.134214285714285, utilized_cpu_logical_agg_metric.get('metric').get('value')) self.assertEqual( 'useast', utilized_cpu_logical_agg_metric.get('meta').get('region')) self.assertEqual( cfg.CONF.messaging.publish_kafka_project_id, utilized_cpu_logical_agg_metric.get('meta').get('tenantId')) self.assertEqual( 'all', utilized_cpu_logical_agg_metric.get('metric').get( 'dimensions').get('project_id')) self.assertEqual( 'hourly', utilized_cpu_logical_agg_metric.get('metric').get( 'dimensions').get('aggregation_period')) self.assertEqual( 13.0, utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('record_count')) self.assertEqual( '2016-03-07 16:09:23', utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('firstrecord_timestamp_string')) self.assertEqual( '2016-03-07 16:10:38', utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('lastrecord_timestamp_string')) # Verify cpu.utilized_logical_cores_agg for the mini-mon host utilized_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg' and value.get('metric').get( 'dimensions').get('host') == 'test-cp1-comp0333-mgmt' ][0] self.assertEqual( 4.9665, utilized_cpu_logical_agg_metric.get('metric').get('value')) self.assertEqual( 'useast', utilized_cpu_logical_agg_metric.get('meta').get('region')) self.assertEqual( cfg.CONF.messaging.publish_kafka_project_id, utilized_cpu_logical_agg_metric.get('meta').get('tenantId')) self.assertEqual( 'all', utilized_cpu_logical_agg_metric.get('metric').get( 'dimensions').get('project_id')) self.assertEqual( 'hourly', utilized_cpu_logical_agg_metric.get('metric').get( 'dimensions').get('aggregation_period')) self.assertEqual( 6.0, utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('record_count')) self.assertEqual( '2016-03-07 16:09:23', utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('firstrecord_timestamp_string')) self.assertEqual( '2016-03-07 16:10:38', utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('lastrecord_timestamp_string')) # Verify cpu.utilized_logical_cores_agg for the devstack host utilized_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg' and value.get('metric').get( 'dimensions').get('host') == 'test-cp1-comp0027-mgmt' ][0] self.assertEqual( 2.1677142857142853, utilized_cpu_logical_agg_metric.get('metric').get('value')) self.assertEqual( 'useast', utilized_cpu_logical_agg_metric.get('meta').get('region')) self.assertEqual( cfg.CONF.messaging.publish_kafka_project_id, utilized_cpu_logical_agg_metric.get('meta').get('tenantId')) self.assertEqual( 'all', utilized_cpu_logical_agg_metric.get('metric').get( 'dimensions').get('project_id')) self.assertEqual( 'hourly', utilized_cpu_logical_agg_metric.get('metric').get( 'dimensions').get('aggregation_period')) self.assertEqual( 7.0, utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('record_count')) self.assertEqual( '2016-03-07 16:09:23', utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('firstrecord_timestamp_string')) self.assertEqual( '2016-03-07 16:10:38', utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('lastrecord_timestamp_string'))
def test_rdd_to_recordstore_second_stage(self, usage_manager, setter_manager, insert_manager): usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr() setter_manager.return_value = \ MockComponentManager.get_setter_cmpt_mgr() insert_manager.return_value = \ MockComponentManager.get_insert_pre_hourly_cmpt_mgr() # Create an RDD out of the mocked Monasca metrics with open(SecondStageDataProvider.kafka_data_path) as f: raw_lines = f.read().splitlines() raw_tuple_list = [eval(raw_line) for raw_line in raw_lines] host_usage_list = map(dump_as_ascii_string, raw_tuple_list) sql_context = SQLContext(self.spark_context) host_usage_rdd = self.spark_context.parallelize(host_usage_list) host_usage_df = sql_context.read.json(host_usage_rdd) PreHourlyProcessor.do_transform(host_usage_df) # get the metrics that have been submitted to the dummy message adapter metrics = DummyAdapter.adapter_impl.metric_list # Verify cpu.total_logical_cores_agg for all hosts total_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.total_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'all' ][0] self.assertEqual( 15.0, total_cpu_logical_agg_metric.get('metric').get('value')) self.assertEqual( 'useast', total_cpu_logical_agg_metric.get('meta').get('region')) self.assertEqual( cfg.CONF.messaging.publish_kafka_project_id, total_cpu_logical_agg_metric.get('meta').get('tenantId')) self.assertEqual( 'all', total_cpu_logical_agg_metric.get('metric').get('dimensions').get( 'project_id')) self.assertEqual( 'prehourly', total_cpu_logical_agg_metric.get('metric').get('dimensions').get( 'aggregation_period')) self.assertEqual( 13.0, total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'record_count')) self.assertEqual( '2016-03-07 16:09:23', total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'firstrecord_timestamp_string')) self.assertEqual( '2016-03-07 16:10:38', total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'lastrecord_timestamp_string')) # Verify cpu.total_logical_cores_agg for mini-mon host total_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.total_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'mini-mon' ][0] self.assertEqual( 9.0, total_cpu_logical_agg_metric.get('metric').get('value')) self.assertEqual( 'useast', total_cpu_logical_agg_metric.get('meta').get('region')) self.assertEqual( cfg.CONF.messaging.publish_kafka_project_id, total_cpu_logical_agg_metric.get('meta').get('tenantId')) self.assertEqual( 'all', total_cpu_logical_agg_metric.get('metric').get('dimensions').get( 'project_id')) self.assertEqual( 'prehourly', total_cpu_logical_agg_metric.get('metric').get('dimensions').get( 'aggregation_period')) self.assertEqual( 6.0, total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'record_count')) self.assertEqual( '2016-03-07 16:09:23', total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'firstrecord_timestamp_string')) self.assertEqual( '2016-03-07 16:10:38', total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'lastrecord_timestamp_string')) # Verify cpu.total_logical_cores_agg for devstack host total_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.total_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'devstack' ][0] self.assertEqual( 6.0, total_cpu_logical_agg_metric.get('metric').get('value')) self.assertEqual( 'useast', total_cpu_logical_agg_metric.get('meta').get('region')) self.assertEqual( cfg.CONF.messaging.publish_kafka_project_id, total_cpu_logical_agg_metric.get('meta').get('tenantId')) self.assertEqual( 'all', total_cpu_logical_agg_metric.get('metric').get('dimensions').get( 'project_id')) self.assertEqual( 'prehourly', total_cpu_logical_agg_metric.get('metric').get('dimensions').get( 'aggregation_period')) self.assertEqual( 7.0, total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'record_count')) self.assertEqual( '2016-03-07 16:09:23', total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'firstrecord_timestamp_string')) self.assertEqual( '2016-03-07 16:10:38', total_cpu_logical_agg_metric.get('metric').get('value_meta').get( 'lastrecord_timestamp_string')) # Verify cpu.utilized_logical_cores_agg for all hosts utilized_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'all' ][0] self.assertEqual( 8.0, utilized_cpu_logical_agg_metric.get('metric').get('value')) self.assertEqual( 'useast', utilized_cpu_logical_agg_metric.get('meta').get('region')) self.assertEqual( cfg.CONF.messaging.publish_kafka_project_id, utilized_cpu_logical_agg_metric.get('meta').get('tenantId')) self.assertEqual( 'all', utilized_cpu_logical_agg_metric.get('metric').get( 'dimensions').get('project_id')) self.assertEqual( 'prehourly', utilized_cpu_logical_agg_metric.get('metric').get( 'dimensions').get('aggregation_period')) self.assertEqual( 13.0, utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('record_count')) self.assertEqual( '2016-03-07 16:09:23', utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('firstrecord_timestamp_string')) self.assertEqual( '2016-03-07 16:10:38', utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('lastrecord_timestamp_string')) # Verify cpu.utilized_logical_cores_agg for the mini-mon host utilized_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'mini-mon' ][0] self.assertEqual( 5.0, utilized_cpu_logical_agg_metric.get('metric').get('value')) self.assertEqual( 'useast', utilized_cpu_logical_agg_metric.get('meta').get('region')) self.assertEqual( cfg.CONF.messaging.publish_kafka_project_id, utilized_cpu_logical_agg_metric.get('meta').get('tenantId')) self.assertEqual( 'all', utilized_cpu_logical_agg_metric.get('metric').get( 'dimensions').get('project_id')) self.assertEqual( 'prehourly', utilized_cpu_logical_agg_metric.get('metric').get( 'dimensions').get('aggregation_period')) self.assertEqual( 6.0, utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('record_count')) self.assertEqual( '2016-03-07 16:09:23', utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('firstrecord_timestamp_string')) self.assertEqual( '2016-03-07 16:10:38', utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('lastrecord_timestamp_string')) # Verify cpu.utilized_logical_cores_agg for the devstack host utilized_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'devstack' ][0] self.assertEqual( 3.0, utilized_cpu_logical_agg_metric.get('metric').get('value')) self.assertEqual( 'useast', utilized_cpu_logical_agg_metric.get('meta').get('region')) self.assertEqual( cfg.CONF.messaging.publish_kafka_project_id, utilized_cpu_logical_agg_metric.get('meta').get('tenantId')) self.assertEqual( 'all', utilized_cpu_logical_agg_metric.get('metric').get( 'dimensions').get('project_id')) self.assertEqual( 'prehourly', utilized_cpu_logical_agg_metric.get('metric').get( 'dimensions').get('aggregation_period')) self.assertEqual( 7.0, utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('record_count')) self.assertEqual( '2016-03-07 16:09:23', utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('firstrecord_timestamp_string')) self.assertEqual( '2016-03-07 16:10:38', utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('lastrecord_timestamp_string'))
def test_fetch_quantity_avg_second_stage(self, usage_manager, setter_manager, insert_manager, data_driven_specs_repo): # test operation test_operation = "avg" # load components usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr() setter_manager.return_value = \ MockComponentManager.get_setter_cmpt_mgr() insert_manager.return_value = \ MockComponentManager.get_insert_pre_hourly_cmpt_mgr() # init mock driver tables data_driven_specs_repo.return_value = \ MockDataDrivenSpecsRepo(self.spark_context, self.get_pre_transform_specs_json(), self.get_transform_specs_json_by_operation( test_operation, 'prehourly')) # Create an RDD out of the mocked Monasca metrics with open(SecondStageDataProvider.kafka_data_path) as f: raw_lines = f.read().splitlines() raw_tuple_list = [eval(raw_line) for raw_line in raw_lines] util_list = map(dump_as_ascii_string, raw_tuple_list) quantity_util_rdd = self.spark_context.parallelize(util_list) sql_context = SQLContext(self.spark_context) quantity_util_df = sql_context.read.json(quantity_util_rdd) PreHourlyProcessor.do_transform(quantity_util_df) metrics = DummyAdapter.adapter_impl.metric_list utilized_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg' ][0] self.assertEqual( 7.134214285714285, utilized_cpu_logical_agg_metric.get('metric').get('value')) self.assertEqual( 'useast', utilized_cpu_logical_agg_metric.get('meta').get('region')) self.assertEqual( cfg.CONF.messaging.publish_kafka_project_id, utilized_cpu_logical_agg_metric.get('meta').get('tenantId')) self.assertEqual( 'all', utilized_cpu_logical_agg_metric.get('metric').get( 'dimensions').get('host')) self.assertEqual( 'all', utilized_cpu_logical_agg_metric.get('metric').get( 'dimensions').get('project_id')) self.assertEqual( 'hourly', utilized_cpu_logical_agg_metric.get('metric').get( 'dimensions').get('aggregation_period')) self.assertEqual( 13.0, utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('record_count')) self.assertEqual( '2016-03-07 16:09:23', utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('firstrecord_timestamp_string')) self.assertEqual( '2016-03-07 16:10:38', utilized_cpu_logical_agg_metric.get('metric').get( 'value_meta').get('lastrecord_timestamp_string'))
def test_vcpus_by_all(self, usage_manager, setter_manager, insert_manager, data_driven_specs_repo): # load components usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr() setter_manager.return_value = \ MockComponentManager.get_setter_cmpt_mgr() insert_manager.return_value = \ MockComponentManager.get_insert_pre_hourly_cmpt_mgr() # init mock driver tables data_driven_specs_repo.return_value = \ MockDataDrivenSpecsRepo( self.spark_context, self.get_pre_transform_specs_json_by_all(), self.get_transform_specs_json_by_all()) # Create an RDD out of the mocked Monasca metrics with open(DataProvider.kafka_data_path) as f: raw_lines = f.read().splitlines() raw_tuple_list = [eval(raw_line) for raw_line in raw_lines] rdd_monasca = self.spark_context.parallelize(raw_tuple_list) # decorate mocked RDD with dummy kafka offsets myOffsetRanges = [OffsetRange("metrics", 1, 10, 20)] # mimic rdd.offsetRanges() transform_context = TransformContextUtils.get_context( offset_info=myOffsetRanges, batch_time_info=self.get_dummy_batch_time()) rdd_monasca_with_offsets = rdd_monasca.map( lambda x: RddTransformContext(x, transform_context)) # Call the primary method in mon_metrics_kafka MonMetricsKafkaProcessor.rdd_to_recordstore(rdd_monasca_with_offsets) # get the metrics that have been submitted to the dummy message adapter metrics = DummyAdapter.adapter_impl.metric_list vm_cpu_list = map(dump_as_ascii_string, metrics) DummyAdapter.adapter_impl.metric_list = [] vm_cpu_rdd = self.spark_context.parallelize(vm_cpu_list) sql_context = SQLContext(self.spark_context) vm_cpu_df = sql_context.read.json(vm_cpu_rdd) PreHourlyProcessor.do_transform(vm_cpu_df) metrics = DummyAdapter.adapter_impl.metric_list vcpus_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'vcpus_agg' and value.get( 'metric').get('dimensions').get('project_id') == 'all' ][0] self.assertTrue(vcpus_agg_metric is not None) self.assertEqual(7.0, vcpus_agg_metric.get('metric').get('value')) self.assertEqual('useast', vcpus_agg_metric.get('meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, vcpus_agg_metric.get('meta').get('tenantId')) self.assertEqual( 'all', vcpus_agg_metric.get('metric').get('dimensions').get('host')) self.assertEqual( 'hourly', vcpus_agg_metric.get('metric').get('dimensions').get( 'aggregation_period')) self.assertEqual( 14.0, vcpus_agg_metric.get('metric').get('value_meta').get( 'record_count')) self.assertEqual( '2016-01-20 16:40:00', vcpus_agg_metric.get('metric').get('value_meta').get( 'firstrecord_timestamp_string')) self.assertEqual( '2016-01-20 16:40:46', vcpus_agg_metric.get('metric').get('value_meta').get( 'lastrecord_timestamp_string'))
def test_rdd_to_recordstore_second_stage(self, usage_manager, setter_manager, insert_manager): usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr() setter_manager.return_value = \ MockComponentManager.get_setter_cmpt_mgr() insert_manager.return_value = \ MockComponentManager.get_insert_pre_hourly_cmpt_mgr() # Create an RDD out of the mocked Monasca metrics with open(SecondStageDataProvider.kafka_data_path) as f: raw_lines = f.read().splitlines() raw_tuple_list = [eval(raw_line) for raw_line in raw_lines] host_usage_list = map(dump_as_ascii_string, raw_tuple_list) sql_context = SQLContext(self.spark_context) host_usage_rdd = self.spark_context.parallelize(host_usage_list) host_usage_df = sql_context.read.json(host_usage_rdd) PreHourlyProcessor.do_transform(host_usage_df) # get the metrics that have been submitted to the dummy message adapter metrics = DummyAdapter.adapter_impl.metric_list # Verify cpu.total_logical_cores_agg for all hosts total_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.total_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'all'][0] self.assertEqual(15.0, total_cpu_logical_agg_metric.get( 'metric').get('value')) self.assertEqual('useast', total_cpu_logical_agg_metric.get( 'meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, total_cpu_logical_agg_metric.get( 'meta').get('tenantId')) self.assertEqual('all', total_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('project_id')) self.assertEqual('prehourly', total_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('aggregation_period')) self.assertEqual(13.0, total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('record_count')) self.assertEqual('2016-03-07 16:09:23', total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-03-07 16:10:38', total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('lastrecord_timestamp_string')) # Verify cpu.total_logical_cores_agg for mini-mon host total_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.total_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'mini-mon'][0] self.assertEqual(9.0, total_cpu_logical_agg_metric.get( 'metric').get('value')) self.assertEqual('useast', total_cpu_logical_agg_metric.get( 'meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, total_cpu_logical_agg_metric.get( 'meta').get('tenantId')) self.assertEqual('all', total_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('project_id')) self.assertEqual('prehourly', total_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('aggregation_period')) self.assertEqual(6.0, total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('record_count')) self.assertEqual('2016-03-07 16:09:23', total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-03-07 16:10:38', total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('lastrecord_timestamp_string')) # Verify cpu.total_logical_cores_agg for devstack host total_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.total_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'devstack'][0] self.assertEqual(6.0, total_cpu_logical_agg_metric.get( 'metric').get('value')) self.assertEqual('useast', total_cpu_logical_agg_metric.get( 'meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, total_cpu_logical_agg_metric.get( 'meta').get('tenantId')) self.assertEqual('all', total_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('project_id')) self.assertEqual('prehourly', total_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('aggregation_period')) self.assertEqual(7.0, total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('record_count')) self.assertEqual('2016-03-07 16:09:23', total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-03-07 16:10:38', total_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('lastrecord_timestamp_string')) # Verify cpu.utilized_logical_cores_agg for all hosts utilized_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'all'][0] self.assertEqual(8.0, utilized_cpu_logical_agg_metric.get( 'metric').get('value')) self.assertEqual('useast', utilized_cpu_logical_agg_metric.get( 'meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, utilized_cpu_logical_agg_metric.get( 'meta').get('tenantId')) self.assertEqual('all', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('project_id')) self.assertEqual('prehourly', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('aggregation_period')) self.assertEqual(13.0, utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('record_count')) self.assertEqual('2016-03-07 16:09:23', utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-03-07 16:10:38', utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('lastrecord_timestamp_string')) # Verify cpu.utilized_logical_cores_agg for the mini-mon host utilized_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'mini-mon'][0] self.assertEqual(5.0, utilized_cpu_logical_agg_metric.get( 'metric').get('value')) self.assertEqual('useast', utilized_cpu_logical_agg_metric.get( 'meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, utilized_cpu_logical_agg_metric.get( 'meta').get('tenantId')) self.assertEqual('all', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('project_id')) self.assertEqual('prehourly', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('aggregation_period')) self.assertEqual(6.0, utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('record_count')) self.assertEqual('2016-03-07 16:09:23', utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-03-07 16:10:38', utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('lastrecord_timestamp_string')) # Verify cpu.utilized_logical_cores_agg for the devstack host utilized_cpu_logical_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg' and value.get('metric').get('dimensions').get('host') == 'devstack'][0] self.assertEqual(3.0, utilized_cpu_logical_agg_metric.get( 'metric').get('value')) self.assertEqual('useast', utilized_cpu_logical_agg_metric.get( 'meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, utilized_cpu_logical_agg_metric.get( 'meta').get('tenantId')) self.assertEqual('all', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('project_id')) self.assertEqual('prehourly', utilized_cpu_logical_agg_metric.get( 'metric').get('dimensions') .get('aggregation_period')) self.assertEqual(7.0, utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('record_count')) self.assertEqual('2016-03-07 16:09:23', utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-03-07 16:10:38', utilized_cpu_logical_agg_metric.get( 'metric').get('value_meta') .get('lastrecord_timestamp_string'))
def test_fetch_quantity_sum_second_stage(self, usage_manager, setter_manager, insert_manager, data_driven_specs_repo): # test operation test_operation = "sum" # load components usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr() setter_manager.return_value = \ MockComponentManager.get_setter_cmpt_mgr() insert_manager.return_value = \ MockComponentManager.get_insert_pre_hourly_cmpt_mgr() # init mock driver tables data_driven_specs_repo.return_value = \ MockDataDrivenSpecsRepo(self.spark_context, self.get_pre_transform_specs_json(), self.get_transform_specs_json_by_operation( test_operation)) # Create an emulated set of Kafka messages (these were gathered # by extracting Monasca messages from the Metrics queue on mini-mon). # Create an RDD out of the mocked Monasca metrics with open(SecondStageDataProvider.fetch_quantity_data_path) as f: raw_lines = f.read().splitlines() raw_tuple_list = [eval(raw_line) for raw_line in raw_lines] instance_usage_list = map(dump_as_ascii_string, raw_tuple_list) # create a json RDD from instance_usage_list instance_usage_rdd = self.spark_context.parallelize( instance_usage_list) sql_context = SQLContext(self.spark_context) instance_usage_df = sql_context.read.json(instance_usage_rdd) # call pre hourly processor PreHourlyProcessor.do_transform(instance_usage_df) metrics = DummyAdapter.adapter_impl.metric_list mem_total_mb_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'mem.total_mb_agg' and value.get('metric').get('dimensions').get('host') == 'mini-mon' ][0] self.assertTrue(mem_total_mb_agg_metric is not None) self.assertEqual('mem.total_mb_agg', mem_total_mb_agg_metric.get('metric').get('name')) self.assertEqual(8679.0, mem_total_mb_agg_metric.get('metric').get('value')) self.assertEqual('useast', mem_total_mb_agg_metric.get('meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, mem_total_mb_agg_metric.get('meta').get('tenantId')) self.assertEqual( 'mini-mon', mem_total_mb_agg_metric.get('metric').get('dimensions').get( 'host')) self.assertEqual( 'all', mem_total_mb_agg_metric.get('metric').get('dimensions').get( 'project_id')) self.assertEqual( 'prehourly', mem_total_mb_agg_metric.get('metric').get('dimensions').get( 'aggregation_period')) self.assertEqual( 39.0, mem_total_mb_agg_metric.get('metric').get('value_meta').get( 'record_count')) self.assertEqual( '2016-01-20 16:40:00', mem_total_mb_agg_metric.get('metric').get('value_meta').get( 'firstrecord_timestamp_string')) self.assertEqual( '2016-01-20 17:40:00', mem_total_mb_agg_metric.get('metric').get('value_meta').get( 'lastrecord_timestamp_string'))
def test_vcpus_by_project(self, usage_manager, setter_manager, insert_manager, data_driven_specs_repo): # load components usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr() setter_manager.return_value = \ MockComponentManager.get_setter_cmpt_mgr() insert_manager.return_value = \ MockComponentManager.get_insert_pre_hourly_cmpt_mgr() # init mock driver tables data_driven_specs_repo.return_value = \ MockDataDrivenSpecsRepo(self.spark_context, self. get_pre_transform_specs_json_by_project(), self.get_transform_specs_json_by_project()) # Create an RDD out of the mocked Monasca metrics with open(DataProvider.kafka_data_path) as f: raw_lines = f.read().splitlines() raw_tuple_list = [eval(raw_line) for raw_line in raw_lines] rdd_monasca = self.spark_context.parallelize(raw_tuple_list) # decorate mocked RDD with dummy kafka offsets myOffsetRanges = [ OffsetRange("metrics", 1, 10, 20)] # mimic rdd.offsetRanges() transform_context = TransformContextUtils.get_context( offset_info=myOffsetRanges, batch_time_info=self.get_dummy_batch_time()) rdd_monasca_with_offsets = rdd_monasca.map( lambda x: RddTransformContext(x, transform_context)) # Call the primary method in mon_metrics_kafka MonMetricsKafkaProcessor.rdd_to_recordstore( rdd_monasca_with_offsets) # get the metrics that have been submitted to the dummy message adapter vm_cpu_list = DummyAdapter.adapter_impl.metric_list vm_cpu_list = map(dump_as_ascii_string, vm_cpu_list) DummyAdapter.adapter_impl.metric_list = [] vm_cpu_rdd = self.spark_context.parallelize(vm_cpu_list) sql_context = SQLContext(self.spark_context) vm_cpu_df = sql_context.read.json(vm_cpu_rdd) PreHourlyProcessor.do_transform(vm_cpu_df) metrics = DummyAdapter.adapter_impl.metric_list vcpus_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'vcpus_agg' and value.get('metric').get('dimensions').get('project_id') == '9647fd5030b04a799b0411cc38c4102d'][0] self.assertTrue(vcpus_agg_metric is not None) self.assertEqual(6.0, vcpus_agg_metric .get('metric').get('value')) self.assertEqual('useast', vcpus_agg_metric .get('meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, vcpus_agg_metric .get('meta').get('tenantId')) self.assertEqual('all', vcpus_agg_metric .get('metric').get('dimensions').get('host')) self.assertEqual('hourly', vcpus_agg_metric .get('metric').get('dimensions') .get('aggregation_period')) self.assertEqual(8.0, vcpus_agg_metric .get('metric').get('value_meta').get('record_count')) self.assertEqual('2016-01-20 16:40:05', vcpus_agg_metric .get('metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-01-20 16:40:46', vcpus_agg_metric .get('metric').get('value_meta') .get('lastrecord_timestamp_string')) vcpus_agg_metric = [ value for value in metrics if value.get('metric').get('name') == 'vcpus_agg' and value.get('metric').get('dimensions').get('project_id') == '8647fd5030b04a799b0411cc38c4102d'][0] self.assertTrue(vcpus_agg_metric is not None) self.assertEqual(1.0, vcpus_agg_metric .get('metric').get('value')) self.assertEqual('useast', vcpus_agg_metric .get('meta').get('region')) self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id, vcpus_agg_metric .get('meta').get('tenantId')) self.assertEqual('all', vcpus_agg_metric .get('metric').get('dimensions').get('host')) self.assertEqual('hourly', vcpus_agg_metric .get('metric').get('dimensions') .get('aggregation_period')) self.assertEqual(6.0, vcpus_agg_metric .get('metric').get('value_meta').get('record_count')) self.assertEqual('2016-01-20 16:40:00', vcpus_agg_metric .get('metric').get('value_meta') .get('firstrecord_timestamp_string')) self.assertEqual('2016-01-20 16:40:42', vcpus_agg_metric .get('metric').get('value_meta') .get('lastrecord_timestamp_string'))