def process_metrics(transform_context, record_store_df):
        """start processing (aggregating) metrics
        """
        #
        # look in record_store_df for list of metrics to be processed
        #
        metric_ids_df = record_store_df.select("metric_id").distinct()
        metric_ids_to_process = [row.metric_id
                                 for row in metric_ids_df.collect()]

        data_driven_specs_repo = DataDrivenSpecsRepoFactory.\
            get_data_driven_specs_repo()
        sqlc = SQLContext.getOrCreate(record_store_df.rdd.context)
        transform_specs_df = data_driven_specs_repo.get_data_driven_specs(
            sql_context=sqlc,
            data_driven_spec_type=DataDrivenSpecsRepo.transform_specs_type)

        for metric_id in metric_ids_to_process:
            transform_spec_df = transform_specs_df.select(
                ["aggregation_params_map", "metric_id"]
            ).where(transform_specs_df.metric_id == metric_id)
            source_record_store_df = record_store_df.select("*").where(
                record_store_df.metric_id == metric_id)

            # set transform_spec_df in TransformContext
            transform_context = \
                TransformContextUtils.get_context(
                    transform_context_info=transform_context,
                    transform_spec_df_info=transform_spec_df)

            MonMetricsKafkaProcessor.process_metric(
                transform_context, source_record_store_df)
    def test_set_aggregated_metric_name(self):

        record_store_df = RecordStoreUtils.create_df_from_json(
            self.sql_context,
            DataProvider.record_store_path)

        transform_spec_df = TransformSpecsUtils.create_df_from_json(
            self.sql_context,
            DataProvider.transform_spec_path)

        transform_context = TransformContextUtils.get_context(
            transform_spec_df_info=transform_spec_df,
            batch_time_info=self.get_dummy_batch_time())

        instance_usage_df = FetchQuantity.usage(
            transform_context, record_store_df)

        instance_usage_df_1 = RollupQuantity.setter(
            transform_context, instance_usage_df)

        instance_usage_df_2 = SetAggregatedMetricName.setter(
            transform_context, instance_usage_df_1)

        result_list = [(row.usage_date, row.usage_hour,
                        row.tenant_id, row.host, row.quantity,
                        row.aggregated_metric_name)
                       for row in instance_usage_df_2.rdd.collect()]

        expected_result = [
            ('2016-02-08', '18', 'all', 'all', 12946.0, 'mem.total_mb_agg')]

        self.assertItemsEqual(result_list, expected_result)
Exemplo n.º 3
0
    def test_set_aggregated_metric_name(self):

        record_store_df = RecordStoreUtils.create_df_from_json(
            self.sql_context, DataProvider.record_store_path)

        transform_spec_df = TransformSpecsUtils.create_df_from_json(
            self.sql_context, DataProvider.transform_spec_path)

        transform_context = TransformContextUtils.get_context(
            transform_spec_df_info=transform_spec_df,
            batch_time_info=self.get_dummy_batch_time())

        instance_usage_df = FetchQuantity.usage(transform_context,
                                                record_store_df)

        instance_usage_df_1 = RollupQuantity.setter(transform_context,
                                                    instance_usage_df)

        instance_usage_df_2 = SetAggregatedMetricName.setter(
            transform_context, instance_usage_df_1)

        result_list = [(row.usage_date, row.usage_hour, row.tenant_id,
                        row.host, row.quantity, row.aggregated_metric_name)
                       for row in instance_usage_df_2.rdd.collect()]

        expected_result = [('2016-02-08', '18', 'all', 'all', 12946.0,
                            'mem.total_mb_agg')]

        self.assertItemsEqual(result_list, expected_result)
    def test_latest_quantity_all_hosts_component(self):

        record_store_df = RecordStoreUtils.create_df_from_json(
            self.sql_context,
            DataProvider.record_store_path)

        transform_spec_df = TransformSpecsUtils.create_df_from_json(
            self.sql_context,
            DataProvider.transform_spec_path)

        transform_context = TransformContextUtils.get_context(
            transform_spec_df_info=transform_spec_df,
            batch_time_info=self.get_dummy_batch_time())

        instance_usage_df = FetchQuantity.usage(
            transform_context, record_store_df)

        result_list = [(row.usage_date, row.usage_hour,
                        row.tenant_id, row.host, row.quantity)
                       for row in instance_usage_df.rdd.collect()]

        expected_result = [
            ('2016-02-08', '18', 'NA', 'mini-mon', 5969.0),
            ('2016-02-08', '18', 'NA', 'devstack', 6977.0)]

        self.assertItemsEqual(result_list, expected_result)
Exemplo n.º 5
0
    def process_metrics(transform_context, record_store_df):
        """start processing (aggregating) metrics"""
        #
        # look in record_store_df for list of metrics to be processed
        #
        metric_ids_df = record_store_df.select("metric_id").distinct()
        metric_ids_to_process = [row.metric_id
                                 for row in metric_ids_df.collect()]

        data_driven_specs_repo = DataDrivenSpecsRepoFactory.\
            get_data_driven_specs_repo()
        sqlc = SQLContext.getOrCreate(record_store_df.rdd.context)
        transform_specs_df = data_driven_specs_repo.get_data_driven_specs(
            sql_context=sqlc,
            data_driven_spec_type=DataDrivenSpecsRepo.transform_specs_type)

        for metric_id in metric_ids_to_process:
            transform_spec_df = transform_specs_df.select(
                ["aggregation_params_map", "metric_id"]
            ).where(transform_specs_df.metric_id == metric_id)
            source_record_store_df = record_store_df.select("*").where(
                record_store_df.metric_id == metric_id)

            # set transform_spec_df in TransformContext
            transform_context = \
                TransformContextUtils.get_context(
                    transform_context_info=transform_context,
                    transform_spec_df_info=transform_spec_df)

            try:
                agg_inst_usage_df = (
                    MonMetricsKafkaProcessor.process_metric(
                        transform_context, source_record_store_df))

                # if running in debug mode, write out the aggregated metric
                # name just processed (along with the count of how many of
                # these were aggregated) to the application log.
                if log.isEnabledFor(logging.DEBUG):
                    agg_inst_usage_collection = agg_inst_usage_df.collect()
                    collection_len = len(agg_inst_usage_collection)
                    if collection_len > 0:
                        agg_inst_usage_dict = (
                            agg_inst_usage_collection[0].asDict())
                        log.debug("Submitted pre-hourly aggregated metric: "
                                  "%s (%s)",
                                  agg_inst_usage_dict[
                                      "aggregated_metric_name"],
                                  str(collection_len))
            except FetchQuantityException:
                raise
            except FetchQuantityUtilException:
                raise
            except Exception as e:
                MonMetricsKafkaProcessor.log_debug(
                    "Exception raised in metric processing for metric: " +
                    str(metric_id) + ".  Error: " + str(e))
    def process_metrics(transform_context, record_store_df):
        """start processing (aggregating) metrics"""
        #
        # look in record_store_df for list of metrics to be processed
        #
        metric_ids_df = record_store_df.select("metric_id").distinct()
        metric_ids_to_process = [row.metric_id
                                 for row in metric_ids_df.collect()]

        data_driven_specs_repo = DataDrivenSpecsRepoFactory.\
            get_data_driven_specs_repo()
        sqlc = SQLContext.getOrCreate(record_store_df.rdd.context)
        transform_specs_df = data_driven_specs_repo.get_data_driven_specs(
            sql_context=sqlc,
            data_driven_spec_type=DataDrivenSpecsRepo.transform_specs_type)

        for metric_id in metric_ids_to_process:
            transform_spec_df = transform_specs_df.select(
                ["aggregation_params_map", "metric_id"]
            ).where(transform_specs_df.metric_id == metric_id)
            source_record_store_df = record_store_df.select("*").where(
                record_store_df.metric_id == metric_id)

            # set transform_spec_df in TransformContext
            transform_context = \
                TransformContextUtils.get_context(
                    transform_context_info=transform_context,
                    transform_spec_df_info=transform_spec_df)

            try:
                agg_inst_usage_df = (
                    MonMetricsKafkaProcessor.process_metric(
                        transform_context, source_record_store_df))

                # if running in debug mode, write out the aggregated metric
                # name just processed (along with the count of how many of
                # these were aggregated) to the application log.
                if log.isEnabledFor(logging.DEBUG):
                    agg_inst_usage_collection = agg_inst_usage_df.collect()
                    collection_len = len(agg_inst_usage_collection)
                    if collection_len > 0:
                        agg_inst_usage_dict = (
                            agg_inst_usage_collection[0].asDict())
                        log.debug("Submitted pre-hourly aggregated metric: "
                                  "%s (%s)",
                                  agg_inst_usage_dict[
                                      "aggregated_metric_name"],
                                  str(collection_len))
            except FetchQuantityException:
                raise
            except FetchQuantityUtilException:
                raise
            except Exception as e:
                MonMetricsKafkaProcessor.log_debug(
                    "Exception raised in metric processing for metric: " +
                    str(metric_id) + ".  Error: " + str(e))
    def test_fetch_quantity_sum(self,
                                usage_manager,
                                setter_manager,
                                insert_manager,
                                data_driven_specs_repo):

        # test operation
        test_operation = "sum"

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(self.spark_context,
                                    self.get_pre_transform_specs_json(),
                                    self.get_transform_specs_json_by_operation(
                                        test_operation))

        # Create an emulated set of Kafka messages (these were gathered
        # by extracting Monasca messages from the Metrics queue on mini-mon).

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [
            OffsetRange("metrics", 1, 10, 20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        try:
            # Call the primary method in mon_metrics_kafka
            MonMetricsKafkaProcessor.rdd_to_recordstore(
                rdd_monasca_with_offsets)
            self.assertTrue(False)
        except FetchQuantityUtilException as e:
            self.assertTrue("Operation sum is not supported" in
                            e.value)
    def test_missing_field_to_filter(self,
                                     usage_manager,
                                     setter_manager,
                                     insert_manager,
                                     data_driven_specs_repo):
        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(
                self.spark_context,
                self.get_pre_transform_specs_json(),
                self.get_invalid_filter_transform_specs_json("",
                                                             "-mgmt$",
                                                             "exclude"))

        # Create an emulated set of Kafka messages (these were gathered
        # by extracting Monasca messages from the Metrics queue on mini-mon).

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.fetch_quantity_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [
            OffsetRange("metrics", 1, 10, 20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        try:
            # Call the primary method in mon_metrics_kafka
            MonMetricsKafkaProcessor.rdd_to_recordstore(
                rdd_monasca_with_offsets)
            # In this case, it's an error if no exception is caught
            self.assertTrue(False)
        except FetchQuantityException as e:
            self.assertTrue("Encountered invalid filter details:" in e.value)
            self.assertTrue("field to filter = ," in e.value)
    def test_invalid_aggregated_metric_name(self,
                                            usage_manager,
                                            setter_manager,
                                            insert_manager,
                                            data_driven_specs_repo):

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(
                self.spark_context,
                self.get_pre_transform_specs_json(),
                self.get_transform_specs_json_invalid_name())

        # Create an emulated set of Kafka messages (these were gathered
        # by extracting Monasca messages from the Metrics queue on mini-mon).

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.fetch_quantity_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [
            OffsetRange("metrics", 1, 10, 20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(
            rdd_monasca_with_offsets)

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list

        # metrics should be empty
        self.assertFalse(metrics)
Exemplo n.º 10
0
 def store_offset_ranges(batch_time, rdd):
     if rdd.isEmpty():
         MonMetricsKafkaProcessor.log_debug(
             "storeOffsetRanges: nothing to process...")
         return rdd
     else:
         my_offset_ranges = rdd.offsetRanges()
         transform_context = \
             TransformContextUtils.get_context(offset_info=my_offset_ranges,
                                               batch_time_info=batch_time
                                               )
         rdd_transform_context = \
             rdd.map(lambda x: RddTransformContext(x, transform_context))
         return rdd_transform_context
 def store_offset_ranges(batch_time, rdd):
     if rdd.isEmpty():
         MonMetricsKafkaProcessor.log_debug(
             "storeOffsetRanges: nothing to process...")
         return rdd
     else:
         my_offset_ranges = rdd.offsetRanges()
         transform_context = \
             TransformContextUtils.get_context(offset_info=my_offset_ranges,
                                               batch_time_info=batch_time
                                               )
         rdd_transform_context = \
             rdd.map(lambda x: RddTransformContext(x, transform_context))
         return rdd_transform_context
    def test_fetch_quantity_sum(self, usage_manager, setter_manager,
                                insert_manager, data_driven_specs_repo):

        # test operation
        test_operation = "sum"

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(self.spark_context,
                                    self.get_pre_transform_specs_json(),
                                    self.get_transform_specs_json_by_operation(
                                        test_operation))

        # Create an emulated set of Kafka messages (these were gathered
        # by extracting Monasca messages from the Metrics queue on mini-mon).

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [OffsetRange("metrics", 1, 10,
                                      20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        try:
            # Call the primary method in mon_metrics_kafka
            MonMetricsKafkaProcessor.rdd_to_recordstore(
                rdd_monasca_with_offsets)
            self.assertTrue(False)
        except FetchQuantityUtilException as e:
            self.assertTrue("Operation sum is not supported" in e.value)
Exemplo n.º 13
0
    def test_missing_field_to_filter(self, usage_manager, setter_manager,
                                     insert_manager, data_driven_specs_repo):
        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(
                self.spark_context,
                self.get_pre_transform_specs_json(),
                self.get_invalid_filter_transform_specs_json("",
                                                             "-mgmt$",
                                                             "exclude"))

        # Create an emulated set of Kafka messages (these were gathered
        # by extracting Monasca messages from the Metrics queue on mini-mon).

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.fetch_quantity_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [OffsetRange("metrics", 1, 10,
                                      20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        try:
            # Call the primary method in mon_metrics_kafka
            MonMetricsKafkaProcessor.rdd_to_recordstore(
                rdd_monasca_with_offsets)
            # In this case, it's an error if no exception is caught
            self.assertTrue(False)
        except FetchQuantityException as e:
            self.assertTrue("Encountered invalid filter details:" in e.value)
            self.assertTrue("field to filter = ," in e.value)
Exemplo n.º 14
0
    def test_invalid_aggregated_metric_name(self, usage_manager,
                                            setter_manager, insert_manager,
                                            data_driven_specs_repo):

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(
                self.spark_context,
                self.get_pre_transform_specs_json(),
                self.get_transform_specs_json_invalid_name())

        # Create an emulated set of Kafka messages (these were gathered
        # by extracting Monasca messages from the Metrics queue on mini-mon).

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.fetch_quantity_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [OffsetRange("metrics", 1, 10,
                                      20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(rdd_monasca_with_offsets)

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list

        # metrics should be empty
        self.assertFalse(metrics)
    def do_transform(instance_usage_df):
        """start processing (aggregating) metrics"""
        #
        # look in instance_usage_df for list of metrics to be processed
        #
        metric_ids_df = instance_usage_df.select(
            "processing_meta.metric_id").distinct()

        metric_ids_to_process = [row.metric_id
                                 for row in metric_ids_df.collect()]

        data_driven_specs_repo = (
            DataDrivenSpecsRepoFactory.get_data_driven_specs_repo())
        sqlc = SQLContext.getOrCreate(instance_usage_df.rdd.context)
        transform_specs_df = data_driven_specs_repo.get_data_driven_specs(
            sql_context=sqlc,
            data_driven_spec_type=DataDrivenSpecsRepo.transform_specs_type)

        for metric_id in metric_ids_to_process:
            transform_spec_df = transform_specs_df.select(
                ["aggregation_params_map", "metric_id"]
            ).where(transform_specs_df.metric_id == metric_id)
            source_instance_usage_df = instance_usage_df.select("*").where(
                instance_usage_df.processing_meta.metric_id == metric_id)

            # set transform_spec_df in TransformContext
            transform_context = TransformContextUtils.get_context(
                transform_spec_df_info=transform_spec_df)

            agg_inst_usage_df = PreHourlyProcessor.process_instance_usage(
                transform_context, source_instance_usage_df)

            # if running in debug mode, write out the aggregated metric
            # name just processed (along with the count of how many of these
            # were aggregated) to the application log.
            if log.isEnabledFor(logging.DEBUG):
                agg_inst_usage_collection = agg_inst_usage_df.collect()
                collection_len = len(agg_inst_usage_collection)
                if collection_len > 0:
                    agg_inst_usage_dict = agg_inst_usage_collection[0].asDict()
                    log.debug("Submitted hourly aggregated metric: %s (%s)",
                              agg_inst_usage_dict["aggregated_metric_name"],
                              str(collection_len))
    def test_transform_builder(self,
                               usage_manager,
                               setter_manager,
                               insert_manager):

        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_cmpt_mgr()

        record_store_json_path = DataProvider.record_store_path

        metric_proc_json_path = DataProvider.transform_spec_path

        sql_context = SQLContext.getOrCreate(self.spark_context)
        record_store_df = \
            RecordStoreUtils.create_df_from_json(sql_context,
                                                 record_store_json_path)

        transform_spec_df = TransformSpecsUtils.create_df_from_json(
            sql_context, metric_proc_json_path)

        transform_context = TransformContextUtils.get_context(
            transform_spec_df_info=transform_spec_df,
            batch_time_info=self.get_dummy_batch_time())

        # invoke the generic transformation builder
        instance_usage_df = GenericTransformBuilder.do_transform(
            transform_context, record_store_df)

        result_list = [(row.usage_date, row.usage_hour,
                        row.tenant_id, row.host, row.quantity,
                        row.aggregated_metric_name)
                       for row in instance_usage_df.rdd.collect()]

        expected_result = [('2016-02-08', '18', 'all',
                            'all', 12946.0,
                            'mem.total_mb_agg')]

        self.assertItemsEqual(result_list, expected_result)
Exemplo n.º 17
0
    def test_transform_builder(self, usage_manager, setter_manager,
                               insert_manager):

        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_cmpt_mgr()

        record_store_json_path = DataProvider.record_store_path

        metric_proc_json_path = DataProvider.transform_spec_path

        sql_context = SQLContext.getOrCreate(self.spark_context)
        record_store_df = \
            RecordStoreUtils.create_df_from_json(sql_context,
                                                 record_store_json_path)

        transform_spec_df = TransformSpecsUtils.create_df_from_json(
            sql_context, metric_proc_json_path)

        transform_context = TransformContextUtils.get_context(
            transform_spec_df_info=transform_spec_df,
            batch_time_info=self.get_dummy_batch_time())

        # invoke the generic transformation builder
        instance_usage_df = GenericTransformBuilder.do_transform(
            transform_context, record_store_df)

        result_list = [(row.usage_date, row.usage_hour, row.tenant_id,
                        row.host, row.quantity, row.aggregated_metric_name)
                       for row in instance_usage_df.rdd.collect()]

        expected_result = [('2016-02-08', '18', 'all', 'all', 12946.0,
                            'mem.total_mb_agg')]

        self.assertItemsEqual(result_list, expected_result)
    def test_vcpus_by_all(self, usage_manager, setter_manager, insert_manager,
                          data_driven_specs_repo):

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(
                self.spark_context,
                self.get_pre_transform_specs_json_by_all(),
                self.get_transform_specs_json_by_all())

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [OffsetRange("metrics", 1, 10,
                                      20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(rdd_monasca_with_offsets)

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list
        vm_cpu_list = map(dump_as_ascii_string, metrics)
        DummyAdapter.adapter_impl.metric_list = []

        vm_cpu_rdd = self.spark_context.parallelize(vm_cpu_list)
        sql_context = SQLContext(self.spark_context)
        vm_cpu_df = sql_context.read.json(vm_cpu_rdd)
        PreHourlyProcessor.do_transform(vm_cpu_df)

        metrics = DummyAdapter.adapter_impl.metric_list
        vcpus_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') == 'vcpus_agg' and value.get(
                'metric').get('dimensions').get('project_id') == 'all'
        ][0]

        self.assertTrue(vcpus_agg_metric is not None)

        self.assertEqual(7.0, vcpus_agg_metric.get('metric').get('value'))
        self.assertEqual('useast', vcpus_agg_metric.get('meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         vcpus_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            vcpus_agg_metric.get('metric').get('dimensions').get('host'))
        self.assertEqual(
            'hourly',
            vcpus_agg_metric.get('metric').get('dimensions').get(
                'aggregation_period'))

        self.assertEqual(
            14.0,
            vcpus_agg_metric.get('metric').get('value_meta').get(
                'record_count'))
        self.assertEqual(
            '2016-01-20 16:40:00',
            vcpus_agg_metric.get('metric').get('value_meta').get(
                'firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-01-20 16:40:46',
            vcpus_agg_metric.get('metric').get('value_meta').get(
                'lastrecord_timestamp_string'))
    def test_rdd_to_recordstore(self,
                                usage_manager,
                                setter_manager,
                                insert_manager):

        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_cmpt_mgr()

        # Create an emulated set of Kafka messages (these were gathered
        # by extracting Monasca messages from the Metrics queue on mini-mon).

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [
            OffsetRange("metrics", 1, 10, 20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(
            rdd_monasca_with_offsets)

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list

        # Verify cpu.total_logical_cores_agg for all hosts
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.total_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'all'][0]

        self.assertEqual(15.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         total_cpu_logical_agg_metric.get(
                             'meta').get('region'))
        self.assertEqual(cfg.CONF.messaging.publish_kafka_tenant_id,
                         total_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(13.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp'))
        self.assertEqual('2016-03-07 16:10:38',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp'))

        # Verify cpu.total_logical_cores_agg for mini-mon host
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.total_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'mini-mon'][0]

        self.assertEqual(9.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         total_cpu_logical_agg_metric.get(
                             'meta').get('region'))
        self.assertEqual(cfg.CONF.messaging.publish_kafka_tenant_id,
                         total_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(6.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp'))
        self.assertEqual('2016-03-07 16:10:38',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp'))

        # Verify cpu.total_logical_cores_agg for devstack host
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.total_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'devstack'][0]

        self.assertEqual(6.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         total_cpu_logical_agg_metric.get(
                             'meta').get('region'))
        self.assertEqual(cfg.CONF.messaging.publish_kafka_tenant_id,
                         total_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(7.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp'))
        self.assertEqual('2016-03-07 16:10:38',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp'))

        # Verify cpu.utilized_logical_cores_agg for all hosts
        utilized_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'all'][0]

        self.assertEqual(8.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_tenant_id,
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(13.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp'))
        self.assertEqual('2016-03-07 16:10:38',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp'))

        # Verify cpu.utilized_logical_cores_agg for the mini-mon host
        utilized_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'mini-mon'][0]

        self.assertEqual(5.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_tenant_id,
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(6.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp'))
        self.assertEqual('2016-03-07 16:10:38',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp'))

        # Verify cpu.utilized_logical_cores_agg for the devstack host
        utilized_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'devstack'][0]

        self.assertEqual(3.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_tenant_id,
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(7.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp'))
        self.assertEqual('2016-03-07 16:10:38',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp'))
    def test_vcpus_by_project(self,
                              usage_manager,
                              setter_manager,
                              insert_manager,
                              data_driven_specs_repo):

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(self.spark_context,
                                    self.
                                    get_pre_transform_specs_json_by_project(),
                                    self.get_transform_specs_json_by_project())

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [
            OffsetRange("metrics", 1, 10, 20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(
            rdd_monasca_with_offsets)

        # get the metrics that have been submitted to the dummy message adapter
        vm_cpu_list = DummyAdapter.adapter_impl.metric_list
        vm_cpu_list = map(dump_as_ascii_string, vm_cpu_list)
        DummyAdapter.adapter_impl.metric_list = []

        vm_cpu_rdd = self.spark_context.parallelize(vm_cpu_list)
        sql_context = SQLContext(self.spark_context)
        vm_cpu_df = sql_context.read.json(vm_cpu_rdd)
        PreHourlyProcessor.do_transform(vm_cpu_df)

        metrics = DummyAdapter.adapter_impl.metric_list

        vcpus_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'vcpus_agg' and
            value.get('metric').get('dimensions').get('project_id') ==
            '9647fd5030b04a799b0411cc38c4102d'][0]

        self.assertTrue(vcpus_agg_metric is not None)

        self.assertEqual(6.0,
                         vcpus_agg_metric
                         .get('metric').get('value'))
        self.assertEqual('useast',
                         vcpus_agg_metric
                         .get('meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         vcpus_agg_metric
                         .get('meta').get('tenantId'))
        self.assertEqual('all',
                         vcpus_agg_metric
                         .get('metric').get('dimensions').get('host'))
        self.assertEqual('hourly',
                         vcpus_agg_metric
                         .get('metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(8.0,
                         vcpus_agg_metric
                         .get('metric').get('value_meta').get('record_count'))
        self.assertEqual('2016-01-20 16:40:05',
                         vcpus_agg_metric
                         .get('metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-01-20 16:40:46',
                         vcpus_agg_metric
                         .get('metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        vcpus_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'vcpus_agg' and
            value.get('metric').get('dimensions').get('project_id') ==
            '8647fd5030b04a799b0411cc38c4102d'][0]

        self.assertTrue(vcpus_agg_metric is not None)

        self.assertEqual(1.0,
                         vcpus_agg_metric
                         .get('metric').get('value'))
        self.assertEqual('useast',
                         vcpus_agg_metric
                         .get('meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         vcpus_agg_metric
                         .get('meta').get('tenantId'))
        self.assertEqual('all',
                         vcpus_agg_metric
                         .get('metric').get('dimensions').get('host'))
        self.assertEqual('hourly',
                         vcpus_agg_metric
                         .get('metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(6.0,
                         vcpus_agg_metric
                         .get('metric').get('value_meta').get('record_count'))
        self.assertEqual('2016-01-20 16:40:00',
                         vcpus_agg_metric
                         .get('metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-01-20 16:40:42',
                         vcpus_agg_metric
                         .get('metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))
    def test_fetch_quantity_avg(self, usage_manager, setter_manager,
                                insert_manager, data_driven_specs_repo):

        # test operation
        test_operation = "avg"

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(self.spark_context,
                                    self.get_pre_transform_specs_json(),
                                    self.get_transform_specs_json_by_operation(
                                        test_operation))

        # Create an emulated set of Kafka messages (these were gathered
        # by extracting Monasca messages from the Metrics queue on mini-mon).

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [OffsetRange("metrics", 1, 10,
                                      20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(rdd_monasca_with_offsets)

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list

        utilized_cpu_logical_agg_metric = [
            value for value in metrics if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg'
        ][0]

        self.assertEqual(
            7.134214285714285,
            utilized_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            utilized_cpu_logical_agg_metric.get('meta').get('region'))

        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            utilized_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('host'))
        self.assertEqual(
            'all',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('project_id'))
        self.assertEqual(
            'hourly',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('aggregation_period'))

        self.assertEqual(
            13.0,
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('lastrecord_timestamp_string'))
    def test_pod_net_in_usage_app(self,
                                  usage_manager,
                                  setter_manager,
                                  insert_manager,
                                  data_driven_specs_repo):

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(self.spark_context,
                                    self.get_pre_transform_specs_json_app(),
                                    self.get_transform_specs_json_app())

        # Create an emulated set of Kafka messages (these were gathered
        # by extracting Monasca messages from the Metrics queue on mini-mon).

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.fetch_quantity_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [
            OffsetRange("metrics", 1, 10, 20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(
            rdd_monasca_with_offsets)

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list

        pod_net_usage_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'pod.net.in_bytes_sec_agg' and
            value.get('metric').get('dimensions').get('app') ==
            'junk' and
            value.get('metric').get('dimensions').get('namespace') ==
            'all' and
            value.get('metric').get('dimensions').get('pod_name') ==
            'all'][0]

        self.assertTrue(pod_net_usage_agg_metric is not None)

        self.assertEqual('pod.net.in_bytes_sec_agg',
                         pod_net_usage_agg_metric
                         .get('metric').get('name'))

        self.assertEqual('junk',
                         pod_net_usage_agg_metric
                         .get("metric").get('dimensions').get('app'))

        self.assertEqual('all',
                         pod_net_usage_agg_metric
                         .get("metric").get('dimensions').get('namespace'))

        self.assertEqual('all',
                         pod_net_usage_agg_metric
                         .get("metric").get('dimensions').get('pod_name'))

        self.assertEqual(122.94,
                         pod_net_usage_agg_metric
                         .get('metric').get('value'))
        self.assertEqual('useast',
                         pod_net_usage_agg_metric
                         .get('meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         pod_net_usage_agg_metric
                         .get('meta').get('tenantId'))

        self.assertEqual('hourly',
                         pod_net_usage_agg_metric
                         .get('metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(3.0,
                         pod_net_usage_agg_metric
                         .get('metric').get('value_meta').get('record_count'))
        self.assertEqual('2017-01-24 20:14:47',
                         pod_net_usage_agg_metric
                         .get('metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2017-01-24 20:15:47',
                         pod_net_usage_agg_metric
                         .get('metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))
Exemplo n.º 23
0
    def test_rdd_to_recordstore(self, usage_manager, setter_manager,
                                insert_manager):

        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [OffsetRange("metrics", 1, 10,
                                      20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(rdd_monasca_with_offsets)

        host_usage_list = DummyAdapter.adapter_impl.metric_list
        host_usage_list = map(dump_as_ascii_string, host_usage_list)
        DummyAdapter.adapter_impl.metric_list = []
        host_usage_rdd = self.spark_context.parallelize(host_usage_list)
        sql_context = SQLContext(self.spark_context)
        host_usage_df = sql_context.read.json(host_usage_rdd)
        PreHourlyProcessor.do_transform(host_usage_df)

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list

        # Verify cpu.total_logical_cores_agg for all hosts
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') == 'cpu.total_logical_cores_agg'
            and value.get('metric').get('dimensions').get('host') == 'all'
        ][0]

        self.assertEqual(
            15.0,
            total_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            total_cpu_logical_agg_metric.get('meta').get('region'))
        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            total_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'project_id'))
        self.assertEqual(
            'hourly',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'aggregation_period'))

        self.assertEqual(
            13.0,
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'lastrecord_timestamp_string'))

        # Verify cpu.total_logical_cores_agg for mini-mon host
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') == 'cpu.total_logical_cores_agg'
            and value.get('metric').get('dimensions').get(
                'host') == 'test-cp1-comp0333-mgmt'
        ][0]

        self.assertEqual(
            9.0,
            total_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            total_cpu_logical_agg_metric.get('meta').get('region'))
        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            total_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'project_id'))
        self.assertEqual(
            'hourly',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'aggregation_period'))

        self.assertEqual(
            6.0,
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'lastrecord_timestamp_string'))

        # Verify cpu.total_logical_cores_agg for devstack host
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') == 'cpu.total_logical_cores_agg'
            and value.get('metric').get('dimensions').get(
                'host') == 'test-cp1-comp0027-mgmt'
        ][0]

        self.assertEqual(
            6.0,
            total_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            total_cpu_logical_agg_metric.get('meta').get('region'))
        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            total_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'project_id'))
        self.assertEqual(
            'hourly',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'aggregation_period'))

        self.assertEqual(
            7.0,
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for all hosts
        utilized_cpu_logical_agg_metric = [
            value for value in metrics if
            value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg'
            and value.get('metric').get('dimensions').get('host') == 'all'
        ][0]

        self.assertEqual(
            7.134214285714285,
            utilized_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            utilized_cpu_logical_agg_metric.get('meta').get('region'))

        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            utilized_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('project_id'))
        self.assertEqual(
            'hourly',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('aggregation_period'))

        self.assertEqual(
            13.0,
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for the mini-mon host
        utilized_cpu_logical_agg_metric = [
            value for value in metrics if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg' and value.get('metric').get(
                'dimensions').get('host') == 'test-cp1-comp0333-mgmt'
        ][0]

        self.assertEqual(
            4.9665,
            utilized_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            utilized_cpu_logical_agg_metric.get('meta').get('region'))

        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            utilized_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('project_id'))
        self.assertEqual(
            'hourly',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('aggregation_period'))

        self.assertEqual(
            6.0,
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for the devstack host
        utilized_cpu_logical_agg_metric = [
            value for value in metrics if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg' and value.get('metric').get(
                'dimensions').get('host') == 'test-cp1-comp0027-mgmt'
        ][0]

        self.assertEqual(
            2.1677142857142853,
            utilized_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            utilized_cpu_logical_agg_metric.get('meta').get('region'))

        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            utilized_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('project_id'))
        self.assertEqual(
            'hourly',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('aggregation_period'))

        self.assertEqual(
            7.0,
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('lastrecord_timestamp_string'))
    def test_fetch_quantity_sum(self,
                                usage_manager,
                                setter_manager,
                                insert_manager,
                                data_driven_specs_repo):

        # test operation
        test_operation = "sum"

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(self.spark_context,
                                    self.get_pre_transform_specs_json(),
                                    self.get_transform_specs_json_by_operation(
                                        test_operation))

        # Create an emulated set of Kafka messages (these were gathered
        # by extracting Monasca messages from the Metrics queue on mini-mon).

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.fetch_quantity_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [
            OffsetRange("metrics", 1, 10, 20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(
            rdd_monasca_with_offsets)

        # get the metrics that have been submitted to the dummy message adapter
        instance_usage_list = DummyAdapter.adapter_impl.metric_list
        instance_usage_list = map(dump_as_ascii_string,
                                  instance_usage_list)
        DummyAdapter.adapter_impl.metric_list = []
        instance_usage_rdd = self.spark_context.parallelize(
            instance_usage_list)
        sql_context = SQLContext(self.spark_context)
        instance_usage_df = sql_context.read.json(instance_usage_rdd)
        PreHourlyProcessor.do_transform(instance_usage_df)

        metrics = DummyAdapter.adapter_impl.metric_list
        mem_total_mb_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'mem.total_mb_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'mini-mon'][0]

        self.assertTrue(mem_total_mb_agg_metric is not None)

        self.assertEqual('mem.total_mb_agg',
                         mem_total_mb_agg_metric
                         .get('metric').get('name'))

        self.assertEqual(15360.0,
                         mem_total_mb_agg_metric
                         .get('metric').get('value'))
        self.assertEqual('useast',
                         mem_total_mb_agg_metric
                         .get('meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         mem_total_mb_agg_metric
                         .get('meta').get('tenantId'))
        self.assertEqual('mini-mon',
                         mem_total_mb_agg_metric
                         .get('metric').get('dimensions').get('host'))
        self.assertEqual('all',
                         mem_total_mb_agg_metric
                         .get('metric').get('dimensions').get('project_id'))
        self.assertEqual('hourly',
                         mem_total_mb_agg_metric
                         .get('metric').get('dimensions')
                         .get('aggregation_period'))
        self.assertEqual(4.0,
                         mem_total_mb_agg_metric
                         .get('metric').get('value_meta').get('record_count'))
        self.assertEqual('2016-01-20 16:40:00',
                         mem_total_mb_agg_metric
                         .get('metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-01-20 16:40:46',
                         mem_total_mb_agg_metric
                         .get('metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))
    def test_fetch_quantity_avg(self,
                                usage_manager,
                                setter_manager,
                                insert_manager,
                                data_driven_specs_repo):

        # test operation
        test_operation = "avg"

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(self.spark_context,
                                    self.get_pre_transform_specs_json(),
                                    self.get_transform_specs_json_by_operation(
                                        test_operation, 'hourly'))

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [
            OffsetRange("metrics", 1, 10, 20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(
            rdd_monasca_with_offsets)

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list
        quantity_util_list = map(dump_as_ascii_string, metrics)

        DummyAdapter.adapter_impl.metric_list = []
        quantity_util_rdd = self.spark_context.parallelize(quantity_util_list)
        sql_context = SQLContext(self.spark_context)
        quantity_util_df = sql_context.read.json(quantity_util_rdd)
        PreHourlyProcessor.do_transform(quantity_util_df)
        metrics = DummyAdapter.adapter_impl.metric_list

        utilized_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get(
                'name') == 'cpu.utilized_logical_cores_agg'][0]

        self.assertEqual(7.134214285714285,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('host'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(13.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))
    def test_pod_net_in_usage_app(self, usage_manager, setter_manager,
                                  insert_manager, data_driven_specs_repo):

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(self.spark_context,
                                    self.get_pre_transform_specs_json_app(),
                                    self.get_transform_specs_json_app())

        # Create an emulated set of Kafka messages (these were gathered
        # by extracting Monasca messages from the Metrics queue on mini-mon).

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.fetch_quantity_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [OffsetRange("metrics", 1, 10,
                                      20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(rdd_monasca_with_offsets)

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list

        pod_net_usage_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') == 'pod.net.in_bytes_sec_agg'
            and value.get('metric').get('dimensions').get('app') == 'junk'
            and value.get('metric').get('dimensions').get('namespace') == 'all'
            and value.get('metric').get('dimensions').get('pod_name') == 'all'
        ][0]

        self.assertTrue(pod_net_usage_agg_metric is not None)

        self.assertEqual('pod.net.in_bytes_sec_agg',
                         pod_net_usage_agg_metric.get('metric').get('name'))

        self.assertEqual(
            'junk',
            pod_net_usage_agg_metric.get("metric").get('dimensions').get(
                'app'))

        self.assertEqual(
            'all',
            pod_net_usage_agg_metric.get("metric").get('dimensions').get(
                'namespace'))

        self.assertEqual(
            'all',
            pod_net_usage_agg_metric.get("metric").get('dimensions').get(
                'pod_name'))

        self.assertEqual(122.94,
                         pod_net_usage_agg_metric.get('metric').get('value'))
        self.assertEqual('useast',
                         pod_net_usage_agg_metric.get('meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         pod_net_usage_agg_metric.get('meta').get('tenantId'))

        self.assertEqual(
            'hourly',
            pod_net_usage_agg_metric.get('metric').get('dimensions').get(
                'aggregation_period'))

        self.assertEqual(
            3.0,
            pod_net_usage_agg_metric.get('metric').get('value_meta').get(
                'record_count'))
        self.assertEqual(
            '2017-01-24 20:14:47',
            pod_net_usage_agg_metric.get('metric').get('value_meta').get(
                'firstrecord_timestamp_string'))
        self.assertEqual(
            '2017-01-24 20:15:47',
            pod_net_usage_agg_metric.get('metric').get('value_meta').get(
                'lastrecord_timestamp_string'))
    def test_rdd_to_recordstore(self,
                                usage_manager,
                                setter_manager,
                                insert_manager):

        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [
            OffsetRange("metrics", 1, 10, 20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(
            rdd_monasca_with_offsets)

        host_usage_list = DummyAdapter.adapter_impl.metric_list
        host_usage_list = map(dump_as_ascii_string,
                              host_usage_list)
        DummyAdapter.adapter_impl.metric_list = []
        host_usage_rdd = self.spark_context.parallelize(host_usage_list)
        sql_context = SQLContext(self.spark_context)
        host_usage_df = sql_context.read.json(host_usage_rdd)
        PreHourlyProcessor.do_transform(host_usage_df)

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list

        # Verify cpu.total_logical_cores_agg for all hosts
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.total_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'all'][0]

        self.assertEqual(15.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         total_cpu_logical_agg_metric.get(
                             'meta').get('region'))
        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         total_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(13.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        # Verify cpu.total_logical_cores_agg for mini-mon host
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.total_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'test-cp1-comp0333-mgmt'][0]

        self.assertEqual(9.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         total_cpu_logical_agg_metric.get(
                             'meta').get('region'))
        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         total_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(6.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        # Verify cpu.total_logical_cores_agg for devstack host
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.total_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'test-cp1-comp0027-mgmt'][0]

        self.assertEqual(6.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         total_cpu_logical_agg_metric.get(
                             'meta').get('region'))
        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         total_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(7.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for all hosts
        utilized_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'all'][0]

        self.assertEqual(7.134214285714285,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(13.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for the mini-mon host
        utilized_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'test-cp1-comp0333-mgmt'][0]

        self.assertEqual(4.9665,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(6.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for the devstack host
        utilized_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'test-cp1-comp0027-mgmt'][0]

        self.assertEqual(2.1677142857142853,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(7.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))