def start_test_pipeline(create_processor, input_data, config_path): """ Creates test pipeline and get results """ configuration = Utils.load_config(config_path) batch = TestPipeline(configuration, create_processor(configuration), input_data) return batch._write_streams
def __init__(self): conf = Utils.load_config(sys.argv[:]) self.__system = conf.property('system') self.__type = conf.property('type') props = conf.property('extractors') start_extractor = self.extractor(props[0], "start") stop_extractor = self.extractor(props[1], "stop") BaseSparkJob.__init__(self, conf, extractors=[start_extractor, stop_extractor])
def start_batch_processing_pipeline(create_processor): """ Creates log parsing kafka pipeline and starts it :param create_event_creators_tree: factory method for event creators tree """ configuration = Utils.load_config(sys.argv[:]) MainPipeline( configuration, create_processor(configuration) )
def start_log_parsing_pipeline(create_event_creators_tree): """ Creates log parsing kafka pipeline and starts it :param create_event_creators_tree: factory method for event creators tree """ configuration = Utils.load_config(sys.argv[:]) KafkaPipeline( configuration, LogParsingProcessor(configuration, create_event_creators_tree(configuration)) ).start()
def start_basic_analytics_pipeline(create_basic_analytic_processor): """ Creates basic analytics kafka pipeline and starts it :param create_basic_analytic_processor: factory method for basic analytic processor """ configuration = Utils.load_config(sys.argv[:]) KafkaPipeline( configuration, create_basic_analytic_processor(configuration) ).start()
Metadata([ StringField("task"), StringField("sequence_number"), StringField("number"), IntField("entities"), IntField("links"), IntField("channels"), IntField("events"), IntField("programs"), IntField("groups"), IntField("on_demand_programs"), IntField("broadcast_events"), ]), RegexpParser( r"^\[Model\] (?P<task>Model state after committing transaction) " r"\[Sequence number: (?P<sequence_number>\d+).*Number: (?P<number>\d+)\] " r"Entities: (?P<entities>\d+) - Links: (?P<links>\d+) - Channels: (?P<channels>\d+)" r" - Events: (?P<events>\d+) - Programs: (?P<programs>\d+) - Groups: (?P<groups>\d+)" r" - OnDemandPrograms: (?P<OnDemandPrograms>\d+) - BroadcastEvents: (?P<BroadcastEvents>\d+)\s*", return_empty_list=True), matcher=SubstringMatcher( "Model state after committing transaction")) if __name__ == "__main__": configuration = Utils.load_config(sys.argv[:]) KafkaPipeline( configuration, LogParsingProcessor(configuration, create_event_creators(configuration))).start()