def create_event_creators(configuration): """ Method creates configuration for VROPS Component all metrics :param configuration: :return: MatchField configuration for VROPS """ custom_dict_event_creator = MutateEventCreator(None, [FieldsMapping(["metrics"], "metrics", convert_influx_str)]) general_creator = EventCreator( Metadata([ StringField("group"), StringField("name"), StringField("res_kind"), StringField("metrics"), StringField("timestamp")] ), RegexpParser(r"(?s)^(?P<group>[-\w]*),.*name=(?P<name>[^,]*).*kind=(?P<res_kind>[^,]*)" r"\s(?P<metrics>.*)\s(?P<timestamp>.*)\n")) metrics_creator = EventCreator(Metadata([ StringField("metrics")]), RegexpParser(r"(?s)^(?P<metrics>[^\[^,]+\S+]*)", return_empty_dict=True), field_to_parse="metrics") return MatchField("source", { "VROPS.log": SourceConfiguration( CompositeEventCreator() .add_source_parser(general_creator) .add_intermediate_result_parser(metrics_creator) .add_intermediate_result_parser(custom_dict_event_creator), Utils.get_output_topic(configuration, "vrops") ) })
def test_return_all_named_group(self): parser = RegexpParser("(?P<t1>\w+).(?P<t2>\w+).(?P<t3>\w+)") self.assertEquals({ "t1": "a", "t2": "b", "t3": "c" }, parser.parse("a|b|c"))
def test_composite_event_create_equals_fields_and_values(self): url_dependent_metadata = Metadata([ StringField("case"), StringField("url"), ]) url_dependent_event_creator = EventCreator( url_dependent_metadata, RegexpParser("(?P<case>\w+) URL = (?P<url>.*)", return_empty_dict=True), field_to_parse="term2") method_dependent_metadata = Metadata([ StringField("case"), StringField("method"), ]) method_dependent_event_creator = EventCreator( method_dependent_metadata, RegexpParser("(?P<case>\w+) Method: '(?P<method>\w+)'", return_empty_dict=True), field_to_parse="term2") duration_dependent_metadata = Metadata([ StringField("case"), StringField("duration"), ]) duration_dependent_event_creator = EventCreator( duration_dependent_metadata, RegexpParser("(?P<case>\w+) Duration\s*'(?P<duration>.*)'", return_empty_dict=True), field_to_parse="term2") event_creator = CompositeEventCreator() \ .add_source_parser(self.main_event_creator) \ .add_intermediate_result_parser(url_dependent_event_creator, final=True) \ .add_intermediate_result_parser(method_dependent_event_creator, final=True) \ .add_intermediate_result_parser(duration_dependent_event_creator, final=True) self.assertEquals( { "term1": "a", "term2": "first_case URL = 123", "case": "first_case", "url": "123" }, event_creator.create(self.row1)) self.assertEquals( { "term1": "a", "term2": "second_case Method: 'ABC'", "case": "second_case", "method": "ABC" }, event_creator.create(self.row2)) self.assertEquals( { "term1": "a", "term2": "third_case Duration '5' ms", "case": "third_case", "duration": "5" }, event_creator.create(self.row3))
def create_event_creators(configuration): """ Method creates configured event_creator for logs from Mongo :param configuration :return: Composite event creator for Mongo """ timezone_name = configuration.property("timezone.name") timezones_property = configuration.property("timezone.priority", "idc") event_creator = EventCreator( Metadata([ ConfigurableTimestampField("timestamp", "%Y-%m-%dT%H:%M:%S.%f", timezone_name, timezones_property, "@timestamp", include_timezone=True), StringField("level"), StringField("event_type"), StringField("thread"), StringField("message") ]), RegexpParser( r"^(?P<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+.\d{4})\s+(?P<level>.*?)\s+" r"(?P<event_type>.*?)\s+\[(?P<thread>.*?)\]\s(?P<message>.*)")) return MatchField( "source", { "mongo.log": SourceConfiguration( event_creator, Utils.get_output_topic(configuration, "mongo_parsed")) })
def crid_creator(): return EventCreator(Metadata([StringField("crid")]), RegexpParser( r"Fabrix input:.*\/(?P<crid>crid[^\/]+)", return_empty_dict=True), matcher=SubstringMatcher("Fabrix input:"), field_to_parse="subtask_message")
def airflow_id_creator(): return EventCreator( Metadata([StringField("airflow_id")]), RegexpParser( r"Submitting asset:\s+(?P<airflow_id>[\d|\w]{32}_[\d|\w]{32})", return_empty_dict=True), matcher=SubstringMatcher("Submitting asset:"), field_to_parse="subtask_message")
def dags_creator(): return EventCreator( Metadata([StringField("dag"), StringField("task")]), RegexpParser( r".*/usr/local/airflow/logs/(?P<dag>\S+)/(?P<task>[\S|^/]+)/.*", return_empty_dict=True), field_to_parse="source")
def create_event_creators(configuration=None): """ Tree of different parsers for all types of logs for poster server :param configuration: YML config :return: Tree of event_creators """ timezone_name = configuration.property("timezone.name") timezones_property = configuration.property("timezone.priority", "dic") poster_server_log = EventCreator( Metadata([ ConfigurableTimestampField("timestamp", "%Y-%m-%d %H:%M:%S,%f", timezone_name, timezones_property, "@timestamp"), StringField("level"), StringField("module"), StringField("message") ]), RegexpParser( r"^(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\,\d{3})" r"\s+(?P<level>\w+?)\s+(?P<module>\w+)\s+(?P<message>.*)")) crid_creator = EventCreator(Metadata([StringField("crid")]), RegexpParser(r".*(?P<crid>crid[^\\]*)", return_empty_dict=True), matcher=SubstringMatcher("crid"), field_to_parse="message") composite_event_creator = CompositeEventCreator() \ .add_source_parser(poster_server_log) \ .add_intermediate_result_parser(crid_creator) return MatchField( "source", { "PosterServer.Error.log": SourceConfiguration( composite_event_creator, Utils.get_output_topic(configuration, "poster_server_error_log")), "PosterServer.log": SourceConfiguration( composite_event_creator, Utils.get_output_topic(configuration, "poster_server_log")) })
def test_composite_event_create_substring_matching(self): url_dependent_metadata = Metadata([ StringField("case"), StringField("url"), ]) url_dependent_event_creator = EventCreator( url_dependent_metadata, RegexpParser("(?P<case>\w+) URL = (?P<url>.*)", return_empty_dict=True), matcher=SubstringMatcher("first_case"), field_to_parse="term2") method_dependent_metadata = Metadata([ StringField("case"), StringField("method"), ]) method_dependent_event_creator = EventCreator( method_dependent_metadata, RegexpParser("(?P<case>\w+) Method: '(?P<method>\w+)'"), matcher=SubstringMatcher("first_case"), field_to_parse="term2") event_creator = CompositeEventCreator() \ .add_source_parser(self.main_event_creator) \ .add_intermediate_result_parser(url_dependent_event_creator, final=True) \ .add_intermediate_result_parser(method_dependent_event_creator, final=True) \ self.assertEquals( { "term1": "a", "term2": "first_case URL = 123", "case": "first_case", "url": "123" }, event_creator.create(self.row1)) self.assertEquals({ "term1": "a", "term2": "second_case Method: 'ABC'" }, event_creator.create(self.row2))
def general_worker_creator(timezone_name, timezones_property): return EventCreator( Metadata([ ConfigurableTimestampField("timestamp", "%Y-%m-%d %H:%M:%S,%f", timezone_name, timezones_property, "@timestamp"), StringField("script"), StringField("level"), StringField("message") ]), RegexpParser( r"^\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\] " r"\{(?P<script>[^\}]+)\} (?P<level>\w+?) - (?P<message>(.|\s)*)" ))
def manager_scheduler_airflow_event_creator(timezone_name, timezones_property): return EventCreator( Metadata([ ConfigurableTimestampField("timestamp", "%Y-%m-%d %H:%M:%S,%f", timezone_name, timezones_property, "@timestamp"), StringField("script"), StringField("script_line"), StringField("level"), StringField("message") ]), RegexpParser( r"^\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\]\s+" r"\{(?P<script>.*?):(?P<script_line>.*?)\}\s+(?P<level>\w+?)\s+-\s+(?P<message>.*)" ))
def subtask_creator(timezone_name, timezones_property): return EventCreator( Metadata([ ConfigurableTimestampField("subtask_timestamp", "%Y-%m-%d %H:%M:%S,%f", timezone_name, timezones_property), StringField("subtask_script"), StringField("subtask_level"), StringField("subtask_message") ]), RegexpParser( r"^Subtask: \[(?P<subtask_timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\]" r" \{(?P<subtask_script>[^\}]+):\d+\} (?P<subtask_level>\w+?) - (?P<subtask_message>(?:.|\s)*)", return_empty_dict=True), matcher=SubstringMatcher("Subtask:"), field_to_parse="message")
def ip_webui_manager_creator(timezone_name, timezones_property): return EventCreator( Metadata([ ConfigurableTimestampField("timestamp", "%d/%b/%Y:%H:%M:%S", timezone_name, timezones_property, "@timestamp", include_timezone=True), StringField("message"), StringField("ip") ]), RegexpParser( r"^(?P<ip>.*?)\s+-\s+-\s\[(?P<timestamp>\d{2}\/\w+\/\d{4}:\d{2}:\d{2}:\d{2}\s.\d{4})\]\s+" r"(?P<message>.*)", return_empty_dict=True))
def script_webui_manager_creator(timezone_name, timezones_property): return EventCreator( Metadata([ ConfigurableTimestampField("timestamp", "%Y-%m-%d %H:%M:%S,%f", timezone_name, timezones_property, "@timestamp"), StringField("thread_id"), StringField("script"), StringField("script_line"), StringField("message"), StringField("level") ]), RegexpParser( r"^\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\]\s+\[(?P<thread_id>.*?)\]\s+" r"\{(?P<script>.*?):(?P<script_line>.*?)\}\s+(?P<level>\w+?)\s+-\s+(?P<message>.*)", return_empty_dict=False))
def webui_manager_creator(timezone_name, timezones_property): return EventCreator( Metadata([ ConfigurableTimestampField("timestamp", "%Y-%m-%d %H:%M:%S", timezone_name, timezones_property, "@timestamp", include_timezone=True), StringField("thread_id"), StringField("message"), StringField("level") ]), RegexpParser( r"^\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\s+?.\d*?)\]\s+?" r"\[(?P<thread_id>.*?)\]\s+?\[(?P<level>\w+?)\]\s+?(?P<message>.*)", return_empty_dict=True))
def create_event_creators(config): """ Method to create a list of event creators for parsing of Traxis Backend logs. :param config: Job configuration. :return: A list of event creators. """ timezone_name = config.property("timezone.name") timezones_priority = config.property("timezone.priority", "dic") general_event_creator = EventCreator( Metadata([ ConfigurableTimestampField("timestamp", "%Y-%m-%d %H:%M:%S,%f", timezone_name, timezones_priority, "@timestamp"), StringField("level"), StringField("message") ]), RegexpParser( r"^(?P<timestamp>\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}.\d+)\s+" r"(?P<level>\S+)\s+\[[^\]]+\]\s+(?P<message>[\s\S]*)")) tva_ingest_event_creator = EventCreator( Metadata([StringField("activity"), StringField("request_id")]), RegexpParser( r"^(?P<activity>OnlineTvaIngest).*\[RequestId\s=\s(?P<request_id>[^]]+)\][\s\S]*", return_empty_dict=True), matcher=SubstringMatcher("OnlineTvaIngest")) tva_manager_event_creator = EventCreator( Metadata([ StringField("activity"), StringField("task"), IntField("duration_ms") ]), RegexpParser( r"^(?P<activity>TvaManager).*\[Task\s=\s(?P<task>[^]]+)\].*took\s'(?P<duration_ms>\d+)'\sms[\s\S]*", return_empty_dict=True), matcher=SubstringMatcher("TvaManager")) parsing_context_event_creator = EventCreator( Metadata([ StringField("activity"), StringField("task"), IntField("duration_ms") ]), RegexpParser( r"^(?P<activity>ParsingContext).*\[Task\s=\s(?P<task>[^]]+)\]\s" r"Tva\singest\scompleted,\sduration\s=\s(?P<duration_ms>\d+)\sms[\s\S]*", return_empty_dict=True), matcher=SubstringMatcher("Tva ingest completed, duration")) write_actions_event_creator = EventCreator( Metadata([ StringField("activity"), StringField("task"), IntField("duration_ms") ]), RegexpParser( r"^(?P<activity>ParsingContext).*\[Task\s=\s(?P<task>[^]]+)\]\s" r"Number\sof\swrite\sactions\squeued.*took\s(?P<duration_ms>\d+)\sms[\s\S]*", return_empty_dict=True), matcher=SubstringMatcher("Number of write actions queued")) return MatchField( "topic", { "traxis_backend_log_gen": MatchField( "source", { "TraxisService.log": SourceConfiguration( CompositeEventCreator().add_source_parser( general_event_creator). add_intermediate_result_parser( tva_ingest_event_creator, final=True).add_intermediate_result_parser( tva_manager_event_creator, final=True).add_intermediate_result_parser( parsing_context_event_creator, final=True).add_intermediate_result_parser( write_actions_event_creator, final=True), Utils.get_output_topic(config, "general")), "TraxisServiceDistributedScheduler.log": SourceConfiguration( general_event_creator, Utils.get_output_topic(config, "scheduler")), "TraxisServiceLogManagement.log": SourceConfiguration( general_event_creator, Utils.get_output_topic(config, "management")) }), "traxis_backend_log_err": SourceConfiguration(general_event_creator, Utils.get_output_topic(config, "error")) })
def create_event_creators(configuration): """ Method creates configuration for Traxis Frontend Component :param configuration :return: MatchField configuration for Traxis Frontend """ timezone_name = configuration.property("timezone.name") timezones_priority = configuration.property("timezone.priority", "dic") event_creator = EventCreator( Metadata([ ConfigurableTimestampField("timestamp", "%Y-%m-%d %H:%M:%S,%f", timezone_name, timezones_priority, "@timestamp"), StringField("level"), StringField("thread_name"), StringField("component"), StringField("message") ]), RegexpParser( r"(?s)^(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})" r"\s*" r"(?P<level>\w+)" r"\s*" r"\[(?P<thread_name>.*?)\]" r"\s*" r"(?P<component>\w+)" r"\s*-\s*" r"(?P<message>.*)$")) ip_event_creator = EventCreator( Metadata([StringField("ip")]), RegexpParser(r"^\[(?P<ip>[0-9,\.: ]*?)\].*", return_empty_dict=True)) request_id_event_creator = EventCreator( Metadata([StringField("request_id", "request-id")]), RegexpParser(r"^.*\[RequestId = (?P<request_id>.*?)\].*", return_empty_dict=True), matcher=SubstringMatcher("RequestId =")) obo_customer_id_event_creator = EventCreator( Metadata([StringField("obo_customer_id", "obo-customer-id")]), RegexpParser(r"^.*\[CustomerId = (?P<obo_customer_id>.*?)\].*", return_empty_dict=True), matcher=SubstringMatcher("CustomerId =")) x_request_id_event_creator = EventCreator( Metadata([StringField("x_request_id", "x-request-id")]), RegexpParser(r"(\n|.)*x-request-id:\s(?P<x_request_id>[a-z0-9- ]*).*", return_empty_dict=True), matcher=SubstringMatcher("x-request-id:")) method_duration_event_creator = EventCreator( Metadata([StringField("method"), StringField("duration")]), RegexpParser( r"^.*Executing method \'(?P<method>.*?)\' took \'(?P<duration>.*?)\'.*", return_empty_dict=True), matcher=SubstringMatcher("Executing method")) method_invoked_event_creator = EventCreator( Metadata([ StringField("method"), StringField("identity"), StringField("product_id", "productId") ]), RegexpParser( r"^.*Method \'(?P<method>.*?)\' invoked with parameters\: identity = (?P<identity>.*?)\, productId =" r" (?P<product_id>.*?)(\,.*|$)", return_empty_dict=True), matcher=SubstringMatcher("invoked with parameters")) cannot_purchase_product_event_creator = EventCreator( Metadata([StringField("product_id", "productId")]), RegexpParser( r"^.*Cannot purchase products of type \'Subscription\'.*productId \'(?P<product_id>.*?)\'$", return_empty_dict=True), matcher=SubstringMatcher("Cannot purchase products of type")) query_metrics_event_creator = EventCreator( Metadata([StringField("query_metrics")]), RegexpParser(r"^.*QueryMetrics:(?P<query_metrics>.*)", return_empty_dict=True), matcher=SubstringMatcher("QueryMetrics")) key_value_event_creator = SingleTypeEventCreator( IntField(None), KeyValueParser(",", "="), field_to_parse="query_metrics") id_event_creator = CompositeEventCreator() \ .add_source_parser(event_creator) \ .add_intermediate_result_parser(ip_event_creator) \ .add_intermediate_result_parser(request_id_event_creator) \ .add_intermediate_result_parser(obo_customer_id_event_creator) \ .add_intermediate_result_parser(x_request_id_event_creator) return MatchField( "source", { "TraxisService.log": SourceConfiguration( id_event_creator.add_intermediate_result_parser( query_metrics_event_creator). add_intermediate_result_parser( key_value_event_creator, final=True).add_intermediate_result_parser( method_duration_event_creator, final=True).add_intermediate_result_parser( method_invoked_event_creator, final=True).add_intermediate_result_parser( cannot_purchase_product_event_creator, final=True), Utils.get_output_topic(configuration, "general")), "TraxisServiceError.log": SourceConfiguration(id_event_creator, Utils.get_output_topic(configuration, "error")), "TraxisServiceDistributedScheduler.log": SourceConfiguration( id_event_creator, Utils.get_output_topic(configuration, "scheduler")), "TraxisServiceLogManagement.log": SourceConfiguration( id_event_creator, Utils.get_output_topic(configuration, "management")) })
def test_return_only_named_group(self): parser = RegexpParser("(?P<term>\w+).(\w+)") self.assertEquals({"term": "a"}, parser.parse("a|b"))
def create_event_creators(config): """ Method creates configuration for UServices Component :param config, configuration :return: Composite event creator for UServices """ timezone_name = config.property("timezone.name") timezones_priority = config.property("timezone.priority", "dic") json_event_creator = SingleTypeEventCreator( StringField(None), JsonParser(keys_mapper=None, values_mapper=None, flatten=True, delimiter='_', fields_to_flat=["http", "header"])) timestamp_event_creator = EventCreator( Metadata([ ConfigurableTimestampField("timestamp", "%Y-%m-%dT%H:%M:%S.%fZ", timezone_name, timezones_priority, "@timestamp") ]), RegexpParser( r"^(?P<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z)"), field_to_parse="@timestamp") http_url_query_event_creator = EventWithUrlCreator( url_query_field="http_urlquery", delete_source_field=False) api_methods_event_creator = PredicateEventCreator( ["app", "header_x-original-uri"], [(["recording-service", "bookings"], { "api_method": "bookings" }), (["recording-service", "recordings"], { "api_method": "recordings" }), (["purchase-service", "history"], { "api_method": "history" }), (["purchase-service", "entitlements"], { "api_method": "entitlements" }), (["vod-service", "contextualvod"], { "api_method": "contextualvod" }), (["vod-service", "detailscreen"], { "api_method": "detailscreen" }), (["vod-service", "gridscreen"], { "api_method": "gridscreen" }), (["discovery-service", "learn-actions"], { "api_method": "learn-actions" }), (["discovery-service", "search"], { "api_method": "search" }), (["discovery-service", "recommendations"], { "api_method": "recommendations" }), (["session-service", "channels"], { "api_method": "channels" }), (["session-service", "cpes"], { "api_method": "cpes" })]) return SourceConfiguration( CompositeEventCreator().add_source_parser(json_event_creator). add_intermediate_result_parser( timestamp_event_creator).add_intermediate_result_parser( http_url_query_event_creator).add_intermediate_result_parser( api_methods_event_creator, final=True), Utils.get_output_topic(config, "uservices_parsed_logs"))
def test_throw_exception_if_not_match(self): parser = RegexpParser("(?P<term>\d+)") self.assertRaises(ParsingException, parser.parse, "a")
def test_return_none_if_does_not_match(self): parser = RegexpParser("(?P<term>\d+)", return_empty_dict=True) self.assertEquals({}, parser.parse("a"))
def manager_dag_run_creator(): return EventCreator( Metadata([StringField("dag"), StringField("tenant")]), RegexpParser(r".*<DagRun\s+(?P<dag>(?P<tenant>.*?)_.*?)\s+.*"), SubstringMatcher("DagRun"))
def manager_dags_creator(): return EventCreator( Metadata([StringField("dag"), StringField("tenant")]), RegexpParser(r".*DAG?\(s\).*\['(?P<dag>(?P<tenant>.*?)_.*?)'\].*"), SubstringMatcher("DAG(s)"))
def test_event_create_equals_fields_and_values(self): event_creator = EventCreator(self.metadata, RegexpParser("(?P<term1>\w+).(?P<term2>\w+)")) self.assertEquals({"term1_result": "a", "term2": "b"}, event_creator.create(self.row))