from_unixtime(col("header.ts") / 1000).cast(TimestampType())) def _process_pipeline(self, read_stream): stb_ids = read_stream.withColumn("stb_id", col("header.viewerID")) requests_count = stb_ids.aggregate( Count(aggregation_name=self._component_name + ".request")) stb_ids_distinct_count = stb_ids.aggregate( DistinctCount(aggregation_field="stb_id", aggregation_name=self._component_name)) return [requests_count, stb_ids_distinct_count] @staticmethod def create_schema(): return StructType([ StructField( "header", StructType([ StructField("ts", LongType()), StructField("viewerID", StringType()), ])) ]) @staticmethod def create_processor(configuration): return EosStbOdhProcessor(configuration) if __name__ == "__main__": start_basic_analytics_pipeline(EosStbOdhProcessor.create_processor)
def __distinct_total_wifi_network_types_count(self, common_wifi_pipeline): return common_wifi_pipeline \ .where((col("rxKbps") > 0) | (col("txKbps") > 0)) \ .aggregate(DistinctCount(aggregation_field="viewer_id", aggregation_name=self._component_name + ".network")) def __wireless_average_upstream_kbps(self, common_wifi_pipeline): return common_wifi_pipeline \ .where("txKbps is not NULL") \ .aggregate(Avg(aggregation_field="txKbps", aggregation_name=self._component_name + ".upstream_kbps")) def __wireless_average_downstream_kbps(self, common_wifi_pipeline): return common_wifi_pipeline \ .where("rxKbps is not NULL") \ .aggregate(Avg(aggregation_field="rxKbps", aggregation_name=self._component_name + ".downstream_kbps")) def create_processor(configuration): """ Method to create the instance of the WiFi report processor """ return WifiReportEventProcessor(configuration, WifiReportEventProcessor.create_schema()) if __name__ == "__main__": start_basic_analytics_pipeline(create_processor)
def _prepare_timefield(self, data_stream): return data_stream.withColumn( "@timestamp", from_unixtime(col("time") / 1000).cast(TimestampType())) def _process_pipeline(self, read_stream): requests_count = read_stream.aggregate( Count(aggregation_name=self._component_name + ".request")) stb_ids_distinct_count = read_stream \ .withColumn("stb_id", col("xdev")) \ .aggregate(DistinctCount(aggregation_field="stb_id", aggregation_name=self._component_name)) return [requests_count, stb_ids_distinct_count] @staticmethod def create_schema(): return StructType([ StructField("time", LongType()), StructField("xdev", StringType()), ]) @staticmethod def create_processor(configuration): return GeneralEosStbProcessor(configuration) if __name__ == "__main__": start_basic_analytics_pipeline(GeneralEosStbProcessor.create_processor)