Пример #1
0
    def _process_pipeline(self, read_stream):
        """This define the aggregation fields and re-use statistical functions from aggregation.py"""
        stream = read_stream \
            .withColumn("VMStat_idlePct", col("VMStat_idlePct").cast(IntegerType())) \
            .withColumn("VMStat_systemPct", col("VMStat_systemPct").cast(IntegerType())) \
            .withColumn("VMStat_iowaitPct", col("VMStat_iowaitPct").cast(IntegerType())) \
            .withColumn("VMStat_hwIrqPct", col("VMStat_hwIrqPct").cast(IntegerType())) \
            .withColumn("MemoryUsage_freeKb", col("MemoryUsage_freeKb").cast(IntegerType())) \
            .withColumn("MemoryUsage_cachedKb", col("MemoryUsage_cachedKb").cast(IntegerType())) \
            .withColumn("MemoryUsage_usedKb", col("MemoryUsage_usedKb").cast(IntegerType())) \
            .withColumn("VMStat_nicePct", col("VMStat_nicePct").cast(IntegerType())) \
            .withColumn("VMStat_userPct", col("VMStat_userPct").cast(IntegerType())) \
            .withColumn("VMStat_swIrqPct", col("VMStat_swIrqPct").cast(IntegerType())) \
            .withColumn("VMStat_loadAverage", col("VMStat_loadAverage").cast(IntegerType()))

        aggregation_fields = ["VMStat_idlePct", "VMStat_systemPct", "VMStat_iowaitPct", "VMStat_hwIrqPct",
                              "MemoryUsage_usedKb", "MemoryUsage_freeKb", "MemoryUsage_cachedKb",
                              "VMStat_nicePct","VMStat_userPct", "VMStat_swIrqPct", "VMStat_loadAverage"]

        aggregation_fields_with_sum = ["MemoryUsage_usedKb", "MemoryUsage_freeKb", "MemoryUsage_cachedKb"]

        aggregations = []
        for field in aggregation_fields:
            kwargs = {'aggregation_field': field}

            aggregations.extend([Count(**kwargs), Max(**kwargs), Min(**kwargs), Stddev(**kwargs),
                                 P01(**kwargs), P05(**kwargs), P10(**kwargs), P25(**kwargs), P50(**kwargs),
                                 P75(**kwargs), P90(**kwargs), P95(**kwargs), P99(**kwargs)])

            if kwargs["aggregation_field"] in aggregation_fields_with_sum:
                aggregations.append(Sum(**kwargs))

        return [stream.aggregate(CompoundAggregation(aggregations=aggregations, group_fields=self.__dimensions,
                                                     aggregation_name=self._component_name))]
    def _process_pipeline(self, json_stream):
        schema = StructType([
            StructField("TUNER", StringType()),
            StructField("BOARD", StringType()),
            StructField("WIFI", StringType()),
            StructField("CPU", StringType()),
            StructField("HDD", StringType())
        ])

        stream = json_stream \
            .withColumn("jsonHW", from_json(col("TemperatureReport_value"), schema).alias("jsonHW")) \
            .withColumn("TUNER", when(col("jsonHW.TUNER") == "-274", None).otherwise(col("jsonHW.TUNER"))) \
            .withColumn("BOARD", when(col("jsonHW.BOARD") == "-274", None).otherwise(col("jsonHW.BOARD"))) \
            .withColumn("WIFI", when(col("jsonHW.WIFI") == "-274", None).otherwise(col("jsonHW.WIFI"))) \
            .withColumn("CPU", when(col("jsonHW.CPU") == "-274", None).otherwise(col("jsonHW.CPU"))) \
            .withColumn("HDD", when(col("jsonHW.HDD") == "-274", None).otherwise(col("jsonHW.HDD"))) \
            .drop("jsonHW") \
            .drop("TemperatureReport_value")

        aggregation_fields = ["TUNER", "BOARD", "WIFI", "CPU", "HDD"]
        aggregations = []

        for field in aggregation_fields:
            kwargs = {'aggregation_field': field}

            aggregations.extend([Count(**kwargs), Max(**kwargs), Min(**kwargs), Stddev(**kwargs),
                                 P01(**kwargs), P05(**kwargs), P10(**kwargs), P25(**kwargs), P50(**kwargs),
                                 P75(**kwargs), P90(**kwargs), P95(**kwargs), P99(**kwargs)])

        return [stream.aggregate(CompoundAggregation(aggregations=aggregations, group_fields=self.__dimensions,
                                                     aggregation_name=self._component_name))]
    def _agg_end2end(self, stream):
        """
        Aggregate end to end calls from STB - uservice
        :param stream:
        :return:
        """
        kwargs = {'aggregation_field': "duration_ms"}

        aggregations = [Max(**kwargs), Min(**kwargs), Avg(**kwargs)]

        return stream.aggregate(CompoundAggregation(aggregations=aggregations, aggregation_name=self._component_name,
                                                    group_fields=["tenant", "app", "status"]))
    def _agg_uservice2component_duration(self, stream):
        """
        Aggregate uservice - he component call duration
        :param stream:
        :return:
        """
        kwargs = {'aggregation_field': "duration_ms"}

        aggregations = [Max(**kwargs), Min(**kwargs), Avg(**kwargs)]

        return stream.aggregate(CompoundAggregation(aggregations=aggregations, aggregation_name=self._component_name,
                                                    group_fields=["tenant", "app", "dest", "host"]))
Пример #5
0
    def _process_pipeline(self, json_stream):
        stream = json_stream .withColumn("UsageCollectorReport_missed_events",
                        col("UsageCollectorReport_missed_events").cast(IntegerType()))

        kwargs = {"aggregation_field": "UsageCollectorReport_missed_events"}

        aggregations = [Sum(**kwargs), Count(**kwargs), Max(**kwargs), Min(**kwargs), Stddev(**kwargs),
                        P01(**kwargs), P05(**kwargs), P10(**kwargs), P25(**kwargs), P50(**kwargs),
                        P75(**kwargs), P90(**kwargs), P95(**kwargs), P99(**kwargs)]

        return [stream.aggregate(CompoundAggregation(aggregations=aggregations, group_fields=self.__dimensions,
                                                     aggregation_name=self._component_name))]
    def _process_pipeline(self, read_stream):

        pre_result_df = self._prepare_input_data_frame(read_stream)

        aggregation_fields_without_sum = TunerPerfReport.get_column_names(
            "TunerReport_SNR")
        aggregation_fields_without_sum.extend(
            TunerPerfReport.get_column_names("TunerReport_signalLevel"))

        aggregation_fields_with_sum = TunerPerfReport.get_column_names(
            "TunerReport_erroreds")
        aggregation_fields_with_sum.extend(
            TunerPerfReport.get_column_names("TunerReport_unerroreds"))
        aggregation_fields_with_sum.extend(
            TunerPerfReport.get_column_names("TunerReport_correcteds"))

        aggregations_ls = []
        aggregations_ls.extend(aggregation_fields_without_sum)
        aggregations_ls.extend(aggregation_fields_with_sum)

        aggregations = []

        for field in aggregations_ls:
            kwargs = {'aggregation_field': field}

            aggregations.extend([
                Count(**kwargs),
                Max(**kwargs),
                Min(**kwargs),
                P01(**kwargs),
                P05(**kwargs),
                P10(**kwargs),
                P25(**kwargs),
                P50(**kwargs),
                P75(**kwargs),
                P90(**kwargs),
                P95(**kwargs),
                P99(**kwargs)
            ])

            if kwargs["aggregation_field"] in aggregation_fields_with_sum:
                aggregations.append(Sum(**kwargs))

        return [
            pre_result_df.aggregate(
                CompoundAggregation(aggregations=aggregations,
                                    group_fields=self.__dimensions,
                                    aggregation_name=self._component_name))
        ]
    def _process_pipeline(self, json_stream):

        stream = json_stream \
            .filter(col("VoiceReport.voiceReport.sessionId").isNotNull()) \
            .select(
                col("@timestamp"),
                col("header.viewerID").alias("viewerID"),
                col("VoiceReport.voiceReport.sessionId").alias("sessionId"),
                col("VoiceReport.voiceReport.sessionCreationTime").alias("sessionCreationTime"),
                col("VoiceReport.voiceReport.audioPacketLoss").alias("audioPacketLoss"),
                col("VoiceReport.voiceReport.audioTransferTime").alias("audioTransferTime"),
                col("VoiceReport.voiceReport.transactionResult").alias("transactionResult")
            )

        aggregation_fields = [
            "sessionCreationTime", "audioPacketLoss", "audioTransferTime"
        ]
        aggregations = []

        for field in aggregation_fields:
            kwargs = {'aggregation_field': field}

            aggregations.extend([
                Count(**kwargs),
                Max(**kwargs),
                Min(**kwargs),
                P01(**kwargs),
                P05(**kwargs),
                P10(**kwargs),
                P25(**kwargs),
                P50(**kwargs),
                P75(**kwargs),
                P90(**kwargs),
                P95(**kwargs),
                P99(**kwargs)
            ])

        return [
            stream.aggregate(
                CompoundAggregation(aggregations=aggregations,
                                    group_fields=self.__dimensions,
                                    aggregation_name=self._component_name)),
            stream.aggregate(
                Count(group_fields=[
                    "viewerID", "sessionId", "transactionResult"
                ],
                      aggregation_name=self._component_name))
        ]