def _process_pipeline(self, read_stream): """This define the aggregation fields and re-use statistical functions from aggregation.py""" stream = read_stream \ .withColumn("VMStat_idlePct", col("VMStat_idlePct").cast(IntegerType())) \ .withColumn("VMStat_systemPct", col("VMStat_systemPct").cast(IntegerType())) \ .withColumn("VMStat_iowaitPct", col("VMStat_iowaitPct").cast(IntegerType())) \ .withColumn("VMStat_hwIrqPct", col("VMStat_hwIrqPct").cast(IntegerType())) \ .withColumn("MemoryUsage_freeKb", col("MemoryUsage_freeKb").cast(IntegerType())) \ .withColumn("MemoryUsage_cachedKb", col("MemoryUsage_cachedKb").cast(IntegerType())) \ .withColumn("MemoryUsage_usedKb", col("MemoryUsage_usedKb").cast(IntegerType())) \ .withColumn("VMStat_nicePct", col("VMStat_nicePct").cast(IntegerType())) \ .withColumn("VMStat_userPct", col("VMStat_userPct").cast(IntegerType())) \ .withColumn("VMStat_swIrqPct", col("VMStat_swIrqPct").cast(IntegerType())) \ .withColumn("VMStat_loadAverage", col("VMStat_loadAverage").cast(IntegerType())) aggregation_fields = ["VMStat_idlePct", "VMStat_systemPct", "VMStat_iowaitPct", "VMStat_hwIrqPct", "MemoryUsage_usedKb", "MemoryUsage_freeKb", "MemoryUsage_cachedKb", "VMStat_nicePct","VMStat_userPct", "VMStat_swIrqPct", "VMStat_loadAverage"] aggregation_fields_with_sum = ["MemoryUsage_usedKb", "MemoryUsage_freeKb", "MemoryUsage_cachedKb"] aggregations = [] for field in aggregation_fields: kwargs = {'aggregation_field': field} aggregations.extend([Count(**kwargs), Max(**kwargs), Min(**kwargs), Stddev(**kwargs), P01(**kwargs), P05(**kwargs), P10(**kwargs), P25(**kwargs), P50(**kwargs), P75(**kwargs), P90(**kwargs), P95(**kwargs), P99(**kwargs)]) if kwargs["aggregation_field"] in aggregation_fields_with_sum: aggregations.append(Sum(**kwargs)) return [stream.aggregate(CompoundAggregation(aggregations=aggregations, group_fields=self.__dimensions, aggregation_name=self._component_name))]
def _process_pipeline(self, json_stream): schema = StructType([ StructField("TUNER", StringType()), StructField("BOARD", StringType()), StructField("WIFI", StringType()), StructField("CPU", StringType()), StructField("HDD", StringType()) ]) stream = json_stream \ .withColumn("jsonHW", from_json(col("TemperatureReport_value"), schema).alias("jsonHW")) \ .withColumn("TUNER", when(col("jsonHW.TUNER") == "-274", None).otherwise(col("jsonHW.TUNER"))) \ .withColumn("BOARD", when(col("jsonHW.BOARD") == "-274", None).otherwise(col("jsonHW.BOARD"))) \ .withColumn("WIFI", when(col("jsonHW.WIFI") == "-274", None).otherwise(col("jsonHW.WIFI"))) \ .withColumn("CPU", when(col("jsonHW.CPU") == "-274", None).otherwise(col("jsonHW.CPU"))) \ .withColumn("HDD", when(col("jsonHW.HDD") == "-274", None).otherwise(col("jsonHW.HDD"))) \ .drop("jsonHW") \ .drop("TemperatureReport_value") aggregation_fields = ["TUNER", "BOARD", "WIFI", "CPU", "HDD"] aggregations = [] for field in aggregation_fields: kwargs = {'aggregation_field': field} aggregations.extend([Count(**kwargs), Max(**kwargs), Min(**kwargs), Stddev(**kwargs), P01(**kwargs), P05(**kwargs), P10(**kwargs), P25(**kwargs), P50(**kwargs), P75(**kwargs), P90(**kwargs), P95(**kwargs), P99(**kwargs)]) return [stream.aggregate(CompoundAggregation(aggregations=aggregations, group_fields=self.__dimensions, aggregation_name=self._component_name))]
def _agg_end2end(self, stream): """ Aggregate end to end calls from STB - uservice :param stream: :return: """ kwargs = {'aggregation_field': "duration_ms"} aggregations = [Max(**kwargs), Min(**kwargs), Avg(**kwargs)] return stream.aggregate(CompoundAggregation(aggregations=aggregations, aggregation_name=self._component_name, group_fields=["tenant", "app", "status"]))
def _agg_uservice2component_duration(self, stream): """ Aggregate uservice - he component call duration :param stream: :return: """ kwargs = {'aggregation_field': "duration_ms"} aggregations = [Max(**kwargs), Min(**kwargs), Avg(**kwargs)] return stream.aggregate(CompoundAggregation(aggregations=aggregations, aggregation_name=self._component_name, group_fields=["tenant", "app", "dest", "host"]))
def _process_pipeline(self, json_stream): stream = json_stream .withColumn("UsageCollectorReport_missed_events", col("UsageCollectorReport_missed_events").cast(IntegerType())) kwargs = {"aggregation_field": "UsageCollectorReport_missed_events"} aggregations = [Sum(**kwargs), Count(**kwargs), Max(**kwargs), Min(**kwargs), Stddev(**kwargs), P01(**kwargs), P05(**kwargs), P10(**kwargs), P25(**kwargs), P50(**kwargs), P75(**kwargs), P90(**kwargs), P95(**kwargs), P99(**kwargs)] return [stream.aggregate(CompoundAggregation(aggregations=aggregations, group_fields=self.__dimensions, aggregation_name=self._component_name))]
def _process_pipeline(self, read_stream): pre_result_df = self._prepare_input_data_frame(read_stream) aggregation_fields_without_sum = TunerPerfReport.get_column_names( "TunerReport_SNR") aggregation_fields_without_sum.extend( TunerPerfReport.get_column_names("TunerReport_signalLevel")) aggregation_fields_with_sum = TunerPerfReport.get_column_names( "TunerReport_erroreds") aggregation_fields_with_sum.extend( TunerPerfReport.get_column_names("TunerReport_unerroreds")) aggregation_fields_with_sum.extend( TunerPerfReport.get_column_names("TunerReport_correcteds")) aggregations_ls = [] aggregations_ls.extend(aggregation_fields_without_sum) aggregations_ls.extend(aggregation_fields_with_sum) aggregations = [] for field in aggregations_ls: kwargs = {'aggregation_field': field} aggregations.extend([ Count(**kwargs), Max(**kwargs), Min(**kwargs), P01(**kwargs), P05(**kwargs), P10(**kwargs), P25(**kwargs), P50(**kwargs), P75(**kwargs), P90(**kwargs), P95(**kwargs), P99(**kwargs) ]) if kwargs["aggregation_field"] in aggregation_fields_with_sum: aggregations.append(Sum(**kwargs)) return [ pre_result_df.aggregate( CompoundAggregation(aggregations=aggregations, group_fields=self.__dimensions, aggregation_name=self._component_name)) ]
def _process_pipeline(self, json_stream): stream = json_stream \ .filter(col("VoiceReport.voiceReport.sessionId").isNotNull()) \ .select( col("@timestamp"), col("header.viewerID").alias("viewerID"), col("VoiceReport.voiceReport.sessionId").alias("sessionId"), col("VoiceReport.voiceReport.sessionCreationTime").alias("sessionCreationTime"), col("VoiceReport.voiceReport.audioPacketLoss").alias("audioPacketLoss"), col("VoiceReport.voiceReport.audioTransferTime").alias("audioTransferTime"), col("VoiceReport.voiceReport.transactionResult").alias("transactionResult") ) aggregation_fields = [ "sessionCreationTime", "audioPacketLoss", "audioTransferTime" ] aggregations = [] for field in aggregation_fields: kwargs = {'aggregation_field': field} aggregations.extend([ Count(**kwargs), Max(**kwargs), Min(**kwargs), P01(**kwargs), P05(**kwargs), P10(**kwargs), P25(**kwargs), P50(**kwargs), P75(**kwargs), P90(**kwargs), P95(**kwargs), P99(**kwargs) ]) return [ stream.aggregate( CompoundAggregation(aggregations=aggregations, group_fields=self.__dimensions, aggregation_name=self._component_name)), stream.aggregate( Count(group_fields=[ "viewerID", "sessionId", "transactionResult" ], aggregation_name=self._component_name)) ]