def test_parse_combined(self): """ Checks that we can parse standart format log example: 127.0.0.1 - - [02/Jul/2015:14:49:48 +0000] "GET /basic_status HTTP/1.1" 200 110 "-" "python-requests/2.2.1 CPython/2.7.6 Linux/3.13.0-48-generic" """ parser = NginxAccessLogParser() line = '127.0.0.1 - - [02/Jul/2015:14:49:48 +0000] "GET /basic_status HTTP/1.1" 200 110 "-" "python-requests/2.2.1 CPython/2.7.6 Linux/3.13.0-48-generic"' parsed = parser.parse(line) # basic keys common_expected_keys = ['remote_addr', 'remote_user', 'time_local', 'request', 'status', 'body_bytes_sent', 'http_referer', 'http_user_agent'] for key in common_expected_keys: assert_that(parsed, has_item(key)) assert_that(parsed['status'], equal_to('200')) assert_that(parsed['body_bytes_sent'], equal_to(110)) assert_that(parsed['remote_user'], equal_to('-')) assert_that(parsed['http_user_agent'], equal_to('python-requests/2.2.1 CPython/2.7.6 Linux/3.13.0-48-generic')) # request keys request_expected_keys = NginxAccessLogParser.request_variables for key in request_expected_keys: assert_that(parsed, has_item(key)) assert_that(parsed['request_method'], equal_to('GET')) assert_that(parsed['request_uri'], equal_to('/basic_status')) assert_that(parsed['server_protocol'], equal_to('HTTP/1.1'))
def test_our_config(self): user_format = '$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$http_x_forwarded_for" rt="$request_time" ua="$upstream_addr" us="$upstream_status" ut="$upstream_response_time" "$gzip_ratio"' line = '127.0.0.1 - - [03/Jul/2015:14:09:38 +0000] "GET /basic_status HTTP/1.1" 200 100 "-" "curl/7.35.0" "-" rt="0.000" ua="-" us="-" ut="-" "-"' parser = NginxAccessLogParser(user_format) parsed = parser.parse(line) assert_that(parsed, has_item('request_time'))
def test_mailformed_request_time(self): user_format = '$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$http_x_forwarded_for" "$host" "$request_time" $gzip_ratio' line = '141.101.234.201 - - [03/Jul/2015:10:52:33 +0300] "POST /wp-login.php HTTP/1.1" 200 3809 "http://estevmeste.ru/wp-login.php" "Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0" "-" "estevmeste.ru" "1299760000.321" -' parser = NginxAccessLogParser(user_format) parsed = parser.parse(line) assert_that(parsed, is_not(has_item('request_time')))
def test_malformed_request(self): line = '10.0.0.1 - - [03/Jul/2015:04:46:18 -0400] "/xxx?q=1 GET POST" 400 173 "-" "-" "-"' parser = NginxAccessLogParser() parsed = parser.parse(line) assert_that(parsed['malformed'], equal_to(True)) assert_that(parsed['status'], equal_to('400'))
def test_recommended_config(self): """ This test is modelled after our 'recommended' configuration format recently added to our docs. https://github.com/nginxinc/nginx-amplify-doc/blob/master/amplify-guide.md#additional-http-metrics """ user_format = \ '$remote_addr - $remote_user [$time_local] "$request" ' + \ ' $status $body_bytes_sent "$http_referer" ' + \ '"$http_user_agent" "$http_x_forwarded_for" ' + \ 'rt=$request_time ua="$upstream_addr" ' + \ 'us="$upstream_status" ut="$upstream_response_time" ' + \ 'cs=$upstream_cache_status' expected_keys = [ 'remote_addr', 'remote_user', 'time_local', 'request', 'status', 'body_bytes_sent', 'http_referer', 'http_user_agent', 'http_x_forwarded_for', 'request_time', 'upstream_addr', 'upstream_status', 'upstream_response_time', 'upstream_cache_status' ] # first try to parse simple line simple_line = \ '85.25.210.234 - - [22/Jan/2010:19:34:21 +0300] "GET /foo/ HTTP/1.1" ' + \ ' 200 11078 "http://www.rambler.ru/" ' + \ '"Mozilla/5.0 (Windows; U; Windows NT 5.1" "-" ' + \ 'rt=0.024 ua="-" ' + \ 'us="-" ut="0.024" ' + \ 'cs="-"' parser = NginxAccessLogParser(user_format) parsed = parser.parse(simple_line) for key in expected_keys: assert_that(parsed, has_item(key)) # now try to parse request with / simple_line = \ '85.25.210.234 - - [22/Jan/2010:19:34:21 +0300] "GET / HTTP/2.0" ' + \ ' 200 11078 "http://www.rambler.ru/" ' + \ '"Mozilla/5.0 (Windows; U; Windows NT 5.1" "-" ' + \ 'rt=0.024 ua="-" ' + \ 'us="-" ut="0.024" ' + \ 'cs="-"' parser = NginxAccessLogParser(user_format) parsed = parser.parse(simple_line) for key in expected_keys: assert_that(parsed, has_item(key)) assert_that(parsed, has_item('request_uri')) assert_that(parsed['request_uri'], equal_to('/')) assert_that(parsed['server_protocol'], equal_to('HTTP/2.0'))
def test_malformed_request(self): """ This test is disabled because our current new handling of $request means the entire line parse fails rather than just request parse. """ line = '10.0.0.1 - - [03/Jul/2015:04:46:18 -0400] "/xxx?q=1 GET POST" 400 173 "-" "-" "-"' parser = NginxAccessLogParser() parsed = parser.parse(line) assert_that(parsed['malformed'], equal_to(True)) assert_that(parsed['status'], equal_to('400'))
def test_lonerr_config(self): user_format = '$remote_addr - $remote_user [$time_local] ' + \ '"$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" ' + \ 'rt=$request_time ut="$upstream_response_time" cs=$upstream_cache_status' line = \ '1.2.3.4 - - [22/Jan/2010:19:34:21 +0300] "GET /foo/ HTTP/1.1" 200 11078 ' + \ '"http://www.rambler.ru/" "Mozilla/5.0 (Windows; U; Windows NT 5.1" rt=0.010 ut="2.001, 0.345" cs=MISS' parser = NginxAccessLogParser(user_format) parsed = parser.parse(line) assert_that(parsed, has_item('upstream_response_time'))
def __init__(self, filename=None, log_format=None, tail=None, **kwargs): super(NginxAccessLogsCollector, self).__init__(**kwargs) self.filename = filename self.parser = NginxAccessLogParser(log_format) self.tail = tail if tail is not None else FileTail(filename) self.filters = [] # skip empty filters and filters for other log file for log_filter in self.object.filters: if log_filter.empty: continue if log_filter.filename and log_filter.filename != self.filename: continue self.filters.append(log_filter) self.register( self.http_method, self.http_status, self.http_version, self.request_length, self.body_bytes_sent, self.bytes_sent, self.gzip_ration, self.request_time, self.upstreams, )
def test_json_config(self): user_format = \ '{"time_local": "$time_local","browser": [{"modern_browser": "$modern_browser",' + \ '"ancient_browser": "$ancient_browser","msie": "$msie"}],"core": [{"args": "$args","uri": "$uri"}]}' expected_keys = [ 'time_local', 'modern_browser', 'ancient_browser', 'msie', 'args' ,'uri' ] simple_line = \ '{"time_local": "27/Jan/2016:12:30:04 -0800","browser": [{"modern_browser": "-","ancient_browser": "1","msie": "-"}],"core": [{"args": "-","uri": "/status"}]}' parser = NginxAccessLogParser(user_format) parsed = parser.parse(simple_line) for key in expected_keys: assert_that(parsed, has_item(key))
def test_soukiii_config(self): """ This test is modelled after user soukiii who reported an error in parsing. https://github.com/nginxinc/nginx-amplify-agent/issues/7 """ user_format = '$remote_addr - [$time_local] $request_method $scheme "$request_uri" ' + \ '$status $request_time $body_bytes_sent "$http_referer" ' + \ '"$http_user_agent" $host' line = \ '85.25.210.234 - [17/Nov/2015:00:20:50 +0100] GET https "/robots.txt" 200 0.024 240 "-" ' + \ '"Mozilla/5.0 (compatible; worldwebheritage.org/1.1; [email protected])" www.nakupni-dum-praha.cz' parser = NginxAccessLogParser(user_format) parsed = parser.parse(line) assert_that(parsed, has_item('status')) assert_that(parsed, has_item('request_method'))
def test_simple_user_format(self): """ Check some user format """ user_format = '$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$http_x_forwarded_for" "$host" "$request_time" $gzip_ratio' expected_keys = ['remote_addr', 'remote_user', 'time_local', 'request_time', 'request', 'status', 'body_bytes_sent', 'http_x_forwarded_for', 'http_referer', 'http_user_agent', 'host', 'gzip_ratio'] parser = NginxAccessLogParser(user_format) for key in expected_keys: assert_that(parser.keys, has_item(key)) lines = [ '141.101.234.201 - - [03/Jul/2015:10:52:33 +0300] "POST /wp-login.php HTTP/1.1" 200 3809 "http://estevmeste.ru/wp-login.php" "Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0" "-" "estevmeste.ru" "0.001" -', '95.211.80.227 - - [03/Jul/2015:10:52:57 +0300] "PUT /stub_status HTTP/1.1" 200 109 "-" "cloudwatch-nginx-agent/1.0" "-" "defan.pp.ru" "0.001" -', '95.211.80.227 - - [03/Jul/2015:10:54:00 +0300] "GET /stub_status HTTP/2.1" 200 109 "-" "cloudwatch-nginx-agent/1.0" "-" "defan.pp.ru" "0.134" -' ] for line in lines: parsed = parser.parse(line) for key in expected_keys: assert_that(parsed, has_item(key)) assert_that(parsed['host'], is_in(['defan.pp.ru', 'estevmeste.ru'])) assert_that(parsed['gzip_ratio'], equal_to(0)) assert_that(parsed['malformed'], equal_to(False)) assert_that(parsed['request_time'], is_(instance_of(list))) assert_that(parsed['request_time'][0], is_(instance_of(float))) # check first line parsed = parser.parse(lines[0]) assert_that(parsed['request_uri'], equal_to('/wp-login.php')) # check second line parsed = parser.parse(lines[1]) assert_that(parsed['request_method'], equal_to('PUT')) # check second line parsed = parser.parse(lines[2]) assert_that(parsed['server_protocol'], equal_to('HTTP/2.1')) assert_that(parsed['request_time'], equal_to([0.134]))
def test_comma_separated_values(self): """ Check some super complex user format with cache """ user_format = '"$upstream_addr" $upstream_status' line = '"173.194.32.133:80, 173.194.32.133:81, 173.194.32.133:82" 200, 200, 200' expected_keys = [ 'upstream_addr', 'upstream_status' ] parser = NginxAccessLogParser(user_format) for key in expected_keys: assert_that(parser.keys, has_item(key)) parsed = parser.parse(line) for key in expected_keys: assert_that(parsed, has_item(key)) assert_that(parsed['upstream_addr'], equal_to(['173.194.32.133:80', '173.194.32.133:81', '173.194.32.133:82'])) assert_that(parsed['upstream_status'], equal_to(['200', '200', '200']))
def test_tab_config(self): user_format = \ '"$time_local"\t"$remote_addr"\t"$http_host"\t"$request"\t"$status"\t"$body_bytes_sent\t' + \ '"$http_referer"\t"$http_user_agent"\t"$http_x_forwarded_for"' expected_keys = [ 'time_local', 'remote_addr', 'http_host', 'request', 'status', 'body_bytes_sent', 'http_referer', 'http_user_agent', 'http_x_forwarded_for' ] simple_line = \ '"27/Jan/2016:12:30:04 -0800" "173.186.135.227" "leete.ru" ' + \ '"GET /img/_data/combined/j6vnc0.css HTTP/2.0" "200" "5909 "https://leete.ru/img/"' + \ ' "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) ' + \ 'Chrome/47.0.2526.111 Safari/537.36" "-"' parser = NginxAccessLogParser(user_format) parsed = parser.parse(simple_line) for key in expected_keys: assert_that(parsed, has_item(key))
def test_format_with_trailing_space(self): user_format = '$remote_addr - $remote_user [$time_local] "$request" ' + \ '$status $body_bytes_sent "$http_referer" ' + \ '"$http_user_agent" "$http_x_forwarded_for" ' + \ 'rt=$request_time ua="$upstream_addr" ' + \ 'us="$upstream_status" ut="$upstream_response_time" ' + \ 'ul="$upstream_response_length" ' + \ 'cs=$upstream_cache_status ' + \ 'sn=$server_name ' expected_keys = ['time_local', 'status'] line = '180.76.15.138 - - [05/May/2016:15:26:57 +0200] "GET / HTTP/1.1" 200 16060 ' + \ '"-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" ' + \ '"-" rt=0.258 ua="-" us="-" ut="-" ul="-" cs=- sn=differentsimgirls.com' parser = NginxAccessLogParser(user_format) parsed = parser.parse(line) for key in expected_keys: assert_that(parsed, has_item(key))
def test_tab_config(self): user_format = \ '"$time_local"\t"$remote_addr"\t"$http_host"\t"$request"\t"$status"\t"$body_bytes_sent\t' + \ '"$http_referer"\t"$http_user_agent"\t"$http_x_forwarded_for"' expected_keys = [ 'time_local', 'remote_addr', 'http_host', 'request_method', 'request_uri', 'server_protocol', 'status', 'body_bytes_sent', 'http_referer', 'http_user_agent', 'http_x_forwarded_for' ] simple_line = \ '"27/Jan/2016:12:30:04 -0800" "173.186.135.227" "leete.ru" ' + \ '"GET /img/_data/combined/j6vnc0.css HTTP/2.0" "200" "5909 "https://leete.ru/img/"' + \ ' "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) ' + \ 'Chrome/47.0.2526.111 Safari/537.36" "-"' parser = NginxAccessLogParser(user_format) parsed = parser.parse(simple_line) for key in expected_keys: assert_that(parsed, has_item(key))
def test_comma_separated_values(self): """ Check some super complex user format with cache """ user_format = '"$upstream_addr" $upstream_status' line = '"173.194.32.133:80, 173.194.32.133:81, 173.194.32.133:82" 200, 200, 200' expected_keys = ['upstream_addr', 'upstream_status'] parser = NginxAccessLogParser(user_format) for key in expected_keys: assert_that(parser.keys, has_item(key)) parsed = parser.parse(line) for key in expected_keys: assert_that(parsed, has_item(key)) assert_that( parsed['upstream_addr'], equal_to([ '173.194.32.133:80', '173.194.32.133:81', '173.194.32.133:82' ])) assert_that(parsed['upstream_status'], equal_to(['200', '200', '200']))
def test_malformed_request_doesnt_raise_exception(self): log_format = '$remote_addr - $remote_user [$time_local] "$request" ' + \ '$status $body_bytes_sent "$http_referer" ' + \ '"$http_user_agent" "$http_x_forwarded_for" "$server_name" $request_id ' + \ '$upstream_http_x_amplify_upstream ' + \ '"$upstream_addr" "$upstream_response_time" "$upstream_status" ' + \ '$ssl_protocol $ssl_cipher $connection/$connection_requests $request_length "$request_time"' parser = NginxAccessLogParser(log_format) line = '139.162.124.167 - - [16/Mar/2017:00:53:14 +0000] "\x04\x01\x1F\x00\x00\x00\x00\x00\x00" ' + \ '400 166 "-" "-" "-" "receiver.amplify.nginx.com" 9dbaa8264268b8aa55fb9af077834702 ' + \ '- "-" "-" "-" - - 42433164/1 0 "0.108"' parsed = parser.parse(line) # this block is true for GSH new parser...comment for now. # for key in parser.keys: # # new parser will get this value, but post parsing logic ignores # # empty values for '*_time' variables. # if not key.endswith('_time'): # assert_that(parsed, has_item(key)) assert_that(parsed['malformed'], equal_to(True))
def test_prepare_combined(self): """ Check that we can prepare standart format: '$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"' """ parser = NginxAccessLogParser() expected_keys = [ 'remote_addr', 'remote_user', 'time_local', 'request', 'status', 'body_bytes_sent', 'http_referer', 'http_user_agent' ] for key in expected_keys: assert_that(parser.keys, has_item(key))
def test_format_with_trailing_space(self): user_format = '$remote_addr - $remote_user [$time_local] "$request" ' + \ '$status $body_bytes_sent "$http_referer" ' + \ '"$http_user_agent" "$http_x_forwarded_for" ' + \ 'rt=$request_time ua="$upstream_addr" ' + \ 'us="$upstream_status" ut="$upstream_response_time" ' + \ 'ul="$upstream_response_length" ' + \ 'cs=$upstream_cache_status ' + \ 'sn=$server_name ' expected_keys = [ 'time_local', 'status' ] line = '180.76.15.138 - - [05/May/2016:15:26:57 +0200] "GET / HTTP/1.1" 200 16060 ' + \ '"-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" ' + \ '"-" rt=0.258 ua="-" us="-" ut="-" ul="-" cs=- sn=differentsimgirls.com' parser = NginxAccessLogParser(user_format) parsed = parser.parse(line) for key in expected_keys: assert_that(parsed, has_item(key))
def __init__(self, filename=None, log_format=None, tail=None, **kwargs): super(NginxAccessLogsCollector, self).__init__(**kwargs) self.filename = filename self.parser = NginxAccessLogParser(log_format) self.tail = tail if tail is not None else FileTail(filename) self.filters = [] # skip empty filters and filters for other log file for log_filter in self.object.filters: if log_filter.empty: continue if log_filter.filename and log_filter.filename != self.filename: continue self.filters.append(log_filter)
def test_ends_with_space_then_empty_var(self): log_format = ( '$remote_addr | $http_x_real_ip | $status | $request | $upstream_addr | $upstream_status | $upstream_response_length | $body_bytes_sent | ' '$upstream_header_time | $upstream_response_time | $upstream_cache_status | $host | $time_local | $http_x_forwarded_for | $http_referer | $http_user_agent | $request_time | ' '$ssl_protocol | $ssl_cipher | $connection_requests | $connection | $bytes_sent | $server_port | $gzip_ratio | ' '$time_iso8601 | $geoip_country_code | $geoip_city_country_name | $geoip_region | $geoip_city | ' '$region | $city_geo | $city_mm | $tz_geo $tz_mm') line = ( '127.0.0.1 | 192.168.0.100 | 200 | GET /foo/bar/123/ HTTP/1.1 | 192.168.0.99:9000 | 200 | 599866 | 52736 | ' '1.500 | 1.500 | - | www.example.com | 05/Jun/2018:10:46:46 +0300 | 192.168.0.100 | - | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24 | 1.500 | ' 'TLSv1.2 | ECDHE-RSA-AES128-GCM-SHA256 | 96 | 12345678 | 54099 | 443 | 11.38 | ' '2018-06-05T10:46:46+03:00 | RU | Russian Federation | - | - | ' '77 | 0JzQvtGB0LrQstCw | | UTC+3 ') parser = NginxAccessLogParser(log_format) parsed = parser.parse(line) assert_that(parsed['region'], equal_to('77')) assert_that(parsed['city_geo'], equal_to('0JzQvtGB0LrQstCw')) assert_that(parsed['city_mm'], equal_to('')) assert_that(parsed['tz_geo'], equal_to('UTC+3')) assert_that(parsed['tz_mm'], equal_to(''))
def test_parse_combined(self): """ Checks that we can parse standart format log example: 127.0.0.1 - - [02/Jul/2015:14:49:48 +0000] "GET /basic_status HTTP/1.1" 200 110 "-" "python-requests/2.2.1 CPython/2.7.6 Linux/3.13.0-48-generic" """ parser = NginxAccessLogParser() line = '127.0.0.1 - - [02/Jul/2015:14:49:48 +0000] "GET /basic_status HTTP/1.1" 200 110 "-" "python-requests/2.2.1 CPython/2.7.6 Linux/3.13.0-48-generic"' parsed = parser.parse(line) # basic keys common_expected_keys = [ 'remote_addr', 'remote_user', 'time_local', 'request', 'status', 'body_bytes_sent', 'http_referer', 'http_user_agent' ] for key in common_expected_keys: assert_that(parsed, has_item(key)) assert_that(parsed['status'], equal_to('200')) assert_that(parsed['body_bytes_sent'], equal_to(110)) assert_that(parsed['remote_user'], equal_to('-')) assert_that( parsed['http_user_agent'], equal_to( 'python-requests/2.2.1 CPython/2.7.6 Linux/3.13.0-48-generic')) # request keys request_expected_keys = NginxAccessLogParser.request_variables for key in request_expected_keys: assert_that(parsed, has_item(key)) assert_that(parsed['request_method'], equal_to('GET')) assert_that(parsed['request_uri'], equal_to('/basic_status')) assert_that(parsed['server_protocol'], equal_to('HTTP/1.1'))
def test_complex_user_format(self): """ Check some super complex user format with cache """ user_format = '$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$http_x_forwarded_for" "$upstream_addr" "$upstream_cache_status" $connection/$connection_requests' line = '217.15.195.202 - - [03/Jul/2015:11:12:53 +0300] "GET /gsat/9854/5231/14 HTTP/1.1" 200 11901 "-" "tile-fetcher/0.1" "-" "173.194.32.133:80" "MISS" 62277/22' expected_keys = [ 'remote_addr', 'remote_user', 'time_local', 'connection', 'request', 'status', 'body_bytes_sent', 'http_x_forwarded_for', 'http_referer', 'http_user_agent', 'upstream_addr', 'upstream_cache_status', 'connection_requests' ] parser = NginxAccessLogParser(user_format) for key in expected_keys: assert_that(parser.keys, has_item(key)) parsed = parser.parse(line) for key in expected_keys: assert_that(parsed, has_item(key)) assert_that(parsed['upstream_addr'], equal_to(['173.194.32.133:80'])) assert_that(parsed['connection'], equal_to('62277')) assert_that(parsed['malformed'], equal_to(False)) assert_that(parsed['upstream_cache_status'], equal_to('MISS'))
def test_recommended_config(self): """ This test is modelled after our 'recommended' configuration format recently added to our docs. https://github.com/nginxinc/nginx-amplify-doc/blob/master/amplify-guide.md#additional-http-metrics """ user_format = \ '$remote_addr - $remote_user [$time_local] "$request" ' + \ ' $status $body_bytes_sent "$http_referer" ' + \ '"$http_user_agent" "$http_x_forwarded_for" ' + \ 'rt=$request_time ua="$upstream_addr" ' + \ 'us="$upstream_status" ut="$upstream_response_time" ' + \ 'cs=$upstream_cache_status' expected_keys = [ 'remote_addr', 'remote_user', 'time_local', 'request_method', 'request_uri', 'server_protocol', 'status', 'body_bytes_sent', 'http_referer', 'http_user_agent', 'http_x_forwarded_for', 'request_time', 'upstream_addr', 'upstream_status', 'upstream_response_time', 'upstream_cache_status' ] # first try to parse simple line simple_line = \ '85.25.210.234 - - [22/Jan/2010:19:34:21 +0300] "GET /foo/ HTTP/1.1" ' + \ ' 200 11078 "http://www.rambler.ru/" ' + \ '"Mozilla/5.0 (Windows; U; Windows NT 5.1" "-" ' + \ 'rt=0.024 ua="-" ' + \ 'us="-" ut="0.024" ' + \ 'cs="-"' parser = NginxAccessLogParser(user_format) parsed = parser.parse(simple_line) for key in expected_keys: assert_that(parsed, has_item(key)) # now try to parse request with / simple_line = \ '85.25.210.234 - - [22/Jan/2010:19:34:21 +0300] "GET / HTTP/2.0" ' + \ ' 200 11078 "http://www.rambler.ru/" ' + \ '"Mozilla/5.0 (Windows; U; Windows NT 5.1" "-" ' + \ 'rt=0.024 ua="-" ' + \ 'us="-" ut="0.024" ' + \ 'cs="-"' parser = NginxAccessLogParser(user_format) parsed = parser.parse(simple_line) for key in expected_keys: assert_that(parsed, has_item(key)) assert_that(parsed, has_item('request_uri')) assert_that(parsed['request_uri'], equal_to('/')) assert_that(parsed['server_protocol'], equal_to('HTTP/2.0'))
def test_prepare_combined(self): """ Check that we can prepare standart format: '$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"' """ parser = NginxAccessLogParser() expected_keys = [ 'remote_addr', 'remote_user', 'time_local', 'request', 'status', 'body_bytes_sent', 'http_referer', 'http_user_agent' ] for key in expected_keys: assert_that(parser.keys, has_item(key)) assert_that( parser.regex_string, equal_to( r'(?P<remote_addr>.+)\ \-\ (?P<remote_user>.+)\ \[(?P<time_local>.+)\]\ \"(?P<request>.+)\"\ (?P<status>\d+)\ (?P<body_bytes_sent>\d+)\ \"(?P<http_referer>.+)\"\ \"(?P<http_user_agent>.+)\"' ))
def test_simple_user_format(self): """ Check some user format """ user_format = '$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$http_x_forwarded_for" "$host" "$request_time" $gzip_ratio' expected_keys = [ 'remote_addr', 'remote_user', 'time_local', 'request_time', 'request', 'status', 'body_bytes_sent', 'http_x_forwarded_for', 'http_referer', 'http_user_agent', 'host', 'gzip_ratio' ] parser = NginxAccessLogParser(user_format) for key in expected_keys: assert_that(parser.keys, has_item(key)) lines = [ '141.101.234.201 - - [03/Jul/2015:10:52:33 +0300] "POST /wp-login.php HTTP/1.1" 200 3809 "http://estevmeste.ru/wp-login.php" "Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0" "-" "estevmeste.ru" "0.001" -', '95.211.80.227 - - [03/Jul/2015:10:52:57 +0300] "PUT /stub_status HTTP/1.1" 200 109 "-" "cloudwatch-nginx-agent/1.0" "-" "defan.pp.ru" "0.001" -', '95.211.80.227 - - [03/Jul/2015:10:54:00 +0300] "GET /stub_status HTTP/2.1" 200 109 "-" "cloudwatch-nginx-agent/1.0" "-" "defan.pp.ru" "0.134" -' ] for line in lines: parsed = parser.parse(line) for key in expected_keys: assert_that(parsed, has_item(key)) assert_that(parsed['host'], is_in(['defan.pp.ru', 'estevmeste.ru'])) assert_that(parsed['gzip_ratio'], equal_to(0)) assert_that(parsed['malformed'], equal_to(False)) assert_that(parsed['request_time'], is_(instance_of(list))) assert_that(parsed['request_time'][0], is_(instance_of(float))) # check first line parsed = parser.parse(lines[0]) assert_that(parsed['request_uri'], equal_to('/wp-login.php')) # check second line parsed = parser.parse(lines[1]) assert_that(parsed['request_method'], equal_to('PUT')) # check second line parsed = parser.parse(lines[2]) assert_that(parsed['server_protocol'], equal_to('HTTP/2.1')) assert_that(parsed['request_time'], equal_to([0.134]))
def test_format_with_braces(self): """ Check log format with braces """ user_format = '$remote_addr - $remote_user [$time_local] $request ' \ '"$status" $body_bytes_sent "$http_referer" ' \ '"$http_user_agent" "$http_x_forwarded_for" ' \ '"http-host=$host" "elapsed=${request_time}s" ' \ '"scheme=${scheme}"' expected_keys = [ 'remote_user', 'remote_addr', 'time_local', 'status', 'request_time', 'scheme', 'host', ] parser = NginxAccessLogParser(user_format) for key in expected_keys: assert_that(parser.keys, has_item(key))
class NginxAccessLogsCollector(AbstractCollector): short_name = 'nginx_alog' counters = { 'nginx.http.method.head': 'request_method', 'nginx.http.method.get': 'request_method', 'nginx.http.method.post': 'request_method', 'nginx.http.method.put': 'request_method', 'nginx.http.method.delete': 'request_method', 'nginx.http.method.options': 'request_method', 'nginx.http.method.other': 'request_method', 'nginx.http.status.1xx': 'status', 'nginx.http.status.2xx': 'status', 'nginx.http.status.3xx': 'status', 'nginx.http.status.4xx': 'status', 'nginx.http.status.5xx': 'status', 'nginx.http.status.discarded': 'status', 'nginx.http.v0_9': 'server_protocol', 'nginx.http.v1_0': 'server_protocol', 'nginx.http.v1_1': 'server_protocol', 'nginx.http.v2': 'server_protocol', 'nginx.http.request.body_bytes_sent': 'body_bytes_sent', 'nginx.http.request.bytes_sent': 'bytes_sent', 'nginx.upstream.status.1xx': 'upstream_status', 'nginx.upstream.status.2xx': 'upstream_status', 'nginx.upstream.status.3xx': 'upstream_status', 'nginx.upstream.status.4xx': 'upstream_status', 'nginx.upstream.status.5xx': 'upstream_status', 'nginx.cache.bypass': '******', 'nginx.cache.expired': 'upstream_cache_status', 'nginx.cache.hit': 'upstream_cache_status', 'nginx.cache.miss': 'upstream_cache_status', 'nginx.cache.revalidated': 'upstream_cache_status', 'nginx.cache.stale': 'upstream_cache_status', 'nginx.cache.updating': 'upstream_cache_status', # 'upstream.next.count': None, # Not sure how best to handle this since this...ignoring for now. # 'upstream.request.count': None # Not sure how to handle for same reason above. } valid_http_methods = ( 'head', 'get', 'post', 'put', 'delete', 'options' ) def __init__(self, filename=None, log_format=None, tail=None, **kwargs): super(NginxAccessLogsCollector, self).__init__(**kwargs) self.filename = filename self.parser = NginxAccessLogParser(log_format) self.tail = tail if tail is not None else FileTail(filename) self.filters = [] # skip empty filters and filters for other log file for log_filter in self.object.filters: if log_filter.empty: continue if log_filter.filename and log_filter.filename != self.filename: continue self.filters.append(log_filter) def init_counters(self): for counter, key in self.counters.iteritems(): # If keys are in the parser format (access log) or not defined (error log) if key in self.parser.keys: self.object.statsd.incr(counter, value=0) def collect(self): self.init_counters() # set all counters to 0 count = 0 for line in self.tail: count += 1 try: parsed = self.parser.parse(line) except: context.log.debug('could parse line %s' % line, exc_info=True) parsed = None if not parsed: continue if parsed['malformed']: self.request_malformed() else: # try to match custom filters matched_filters = [] for log_filter in self.filters: if log_filter.match(parsed): matched_filters.append(log_filter) for method in ( self.http_method, self.http_status, self.http_version, self.request_length, self.body_bytes_sent, self.bytes_sent, self.gzip_ration, self.request_time, self.upstreams, ): try: method(parsed, matched_filters) except Exception as e: exception_name = e.__class__.__name__ context.log.error( 'failed to collect log metrics %s due to %s' % (method.__name__, exception_name)) context.log.debug('additional info:', exc_info=True) context.log.debug('%s processed %s lines from %s' % (self.object.definition_hash, count, self.filename)) def request_malformed(self): """ nginx.http.request.malformed """ self.object.statsd.incr('nginx.http.request.malformed') def http_method(self, data, matched_filters=None): """ nginx.http.method.head nginx.http.method.get nginx.http.method.post nginx.http.method.put nginx.http.method.delete nginx.http.method.options nginx.http.method.other :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'request_method' in data: method = data['request_method'].lower() method = method if method in self.valid_http_methods else 'other' metric_name = 'nginx.http.method.%s' % method self.object.statsd.incr(metric_name) # call custom filters if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) def http_status(self, data, matched_filters=None): """ nginx.http.status.1xx nginx.http.status.2xx nginx.http.status.3xx nginx.http.status.4xx nginx.http.status.5xx nginx.http.status.discarded :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'status' in data: status = data['status'] suffix = 'discarded' if status in ('499', '444', '408') else '%sxx' % status[0] metric_name = 'nginx.http.status.%s' % suffix self.object.statsd.incr(metric_name) # call custom filters if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) def http_version(self, data, matched_filters=None): """ nginx.http.v0_9 nginx.http.v1_0 nginx.http.v1_1 nginx.http.v2 :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'server_protocol' in data: proto = data['server_protocol'] if not proto.startswith('HTTP'): return version = proto.split('/')[-1] if version.startswith('0.9'): suffix = '0_9' elif version.startswith('1.0'): suffix = '1_0' elif version.startswith('1.1'): suffix = '1_1' elif version.startswith('2.0'): suffix = '2' else: suffix = version.replace('.', '_') metric_name = 'nginx.http.v%s' % suffix self.object.statsd.incr(metric_name) # call custom filters if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) def request_length(self, data, matched_filters=None): """ nginx.http.request.length :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'request_length' in data: metric_name, value = 'nginx.http.request.length', data['request_length'] self.object.statsd.average(metric_name, value) # call custom filters if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.average) def body_bytes_sent(self, data, matched_filters=None): """ nginx.http.request.body_bytes_sent :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'body_bytes_sent' in data: metric_name, value = 'nginx.http.request.body_bytes_sent', data['body_bytes_sent'] self.object.statsd.incr(metric_name, value) # call custom filters if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.incr) def bytes_sent(self, data, matched_filters=None): """ nginx.http.request.bytes_sent :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'bytes_sent' in data: metric_name, value = 'nginx.http.request.bytes_sent', data['bytes_sent'] self.object.statsd.incr(metric_name, value) # call custom filters if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.incr) def gzip_ration(self, data, matched_filters=None): """ nginx.http.gzip.ratio :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'gzip_ratio' in data: metric_name, value = 'nginx.http.gzip.ratio', data['gzip_ratio'] self.object.statsd.average(metric_name, value) # call custom filters if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.average) def request_time(self, data, matched_filters=None): """ nginx.http.request.time nginx.http.request.time.median nginx.http.request.time.max nginx.http.request.time.pctl95 nginx.http.request.time.count :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'request_time' in data: metric_name, value = 'nginx.http.request.time', sum(data['request_time']) self.object.statsd.timer(metric_name, value) # call custom filters if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.timer) def upstreams(self, data, matched_filters=None): """ nginx.cache.bypass nginx.cache.expired nginx.cache.hit nginx.cache.miss nginx.cache.revalidated nginx.cache.stale nginx.cache.updating nginx.upstream.request.count nginx.upstream.next.count nginx.upstream.connect.time nginx.upstream.connect.time.median nginx.upstream.connect.time.max nginx.upstream.connect.time.pctl95 nginx.upstream.connect.time.count nginx.upstream.header.time nginx.upstream.header.time.median nginx.upstream.header.time.max nginx.upstream.header.time.pctl95 nginx.upstream.header.time.count nginx.upstream.response.time nginx.upstream.response.time.median nginx.upstream.response.time.max nginx.upstream.response.time.pctl95 nginx.upstream.response.time.count nginx.upstream.status.1xx nginx.upstream.status.2xx nginx.upstream.status.3xx nginx.upstream.status.4xx nginx.upstream.status.5xx nginx.upstream.response.length :param data: {} of parsed line :param matched_filters: [] of matched filters """ # find out if we have info about upstreams empty_values = ('-', '') upstream_data_found = False for key in data.iterkeys(): if key.startswith('upstream') and data[key] not in empty_values: upstream_data_found = True break if not upstream_data_found: return # counters upstream_response = False if 'upstream_status' in data: status = data['upstream_status'] if status.isdigit(): suffix = '%sxx' % status[0] metric_name = 'nginx.upstream.status.%s' % suffix upstream_response = True if suffix in ('2xx', '3xx') else False # Set flag for upstream length processing self.object.statsd.incr(metric_name) # call custom filters if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) if upstream_response and 'upstream_response_length' in data: metric_name, value = 'nginx.upstream.response.length', data['upstream_response_length'] self.object.statsd.average(metric_name, value) # call custom filters if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.average) # gauges upstream_switches = None for metric_name, key_name in { 'nginx.upstream.connect.time': 'upstream_connect_time', 'nginx.upstream.response.time': 'upstream_response_time', 'nginx.upstream.header.time': 'upstream_header_time' }.iteritems(): if key_name in data: values = data[key_name] # set upstream switches one time if len(values) > 1 and upstream_switches is None: upstream_switches = len(values) - 1 # store all values value = sum(values) self.object.statsd.timer(metric_name, value) # call custom filters if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.timer) # log upstream switches metric_name, value = 'nginx.upstream.next.count', 0 if upstream_switches is None else upstream_switches self.object.statsd.incr(metric_name, value) # call custom filters if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.incr) # cache if 'upstream_cache_status' in data: cache_status = data['upstream_cache_status'] if not cache_status.startswith('-'): metric_name = 'nginx.cache.%s' % cache_status.lower() self.object.statsd.incr(metric_name) # call custom filters if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) # log total upstream requests metric_name = 'nginx.upstream.request.count' self.object.statsd.incr(metric_name) # call custom filters if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) @staticmethod def count_custom_filter(matched_filters, metric_name, value, method): """ Collect custom metric :param matched_filters: [] of matched filters :param metric_name: str metric name :param value: int/float value :param method: function to call :return: """ for log_filter in matched_filters: if log_filter.metric == metric_name: full_metric_name = '%s||%s' % (log_filter.metric, log_filter.filter_rule_id) method(full_metric_name, value)
def test_format_with_multiple_lines(self): splunk_log_format = '''timestamp=$time_iso8601 level=6 body_bytes_sent=$body_bytes_sent correlation_id=$upstream_http_x_correlation_id http_referrer="$http_referer" http_user_agent="$http_user_agent" level_name=info organisation=Retail path="$scheme://$server_name $uri" querystring=$args remote_addr="$remote_addr" request_duration=$request_time request_method=$request_method request_path=$uri request_uri=$request_uri server_name=$server_name server_protocol=$server_protocol source_system="$upstream_http_x_source_system" status=$status upstream_addr="$upstream_addr" upstream_cache_status=$upstream_cache_status upstream_connect_time=$upstream_connect_time upstream_header_time=$upstream_header_time upstream_response_time=$upstream_response_time upstream_status=$upstream_status ''' splunk_log_record = '''timestamp=2018-07-17T18:07:31+00:00 level=6 body_bytes_sent=97 correlation_id=- http_referrer="-" http_user_agent="nginx-amplify-agent/1.5.0-1" level_name=info organisation=Retail path="http:// /basic_status" querystring=- remote_addr="127.0.0.1" request_duration=0.000 request_method=GET request_path=/basic_status request_uri=/basic_status server_name= server_protocol=HTTP/1.1 source_system="-" status=200 upstream_addr="-" upstream_cache_status=- upstream_connect_time=- upstream_header_time=- upstream_response_time=- upstream_status=- ''' parser = NginxAccessLogParser(splunk_log_format) parsed = parser.parse(splunk_log_record) expected_keys = [ "time_iso8601", "body_bytes_sent", "upstream_http_x_correlation_id", "http_referer", "http_user_agent", "scheme", "uri", "args", "remote_addr", "request_time", "request_method", "request_uri", "server_name", "server_protocol", "upstream_http_x_source_system", "status", "upstream_addr", "upstream_cache_status", "upstream_status" ] # check for the keys assert_that(parsed, has_items(*expected_keys)) # check for values assert_that(parsed['body_bytes_sent'], equal_to(97)) assert_that(parsed['time_iso8601'], equal_to('2018-07-17T18:07:31+00:00')) assert_that(parsed['http_user_agent'], equal_to('nginx-amplify-agent/1.5.0-1'))
class NginxAccessLogsCollector(AbstractCollector): short_name = 'nginx_alog' counters = { 'nginx.http.method.head': 'request_method', 'nginx.http.method.get': 'request_method', 'nginx.http.method.post': 'request_method', 'nginx.http.method.put': 'request_method', 'nginx.http.method.delete': 'request_method', 'nginx.http.method.options': 'request_method', 'nginx.http.method.other': 'request_method', 'nginx.http.status.1xx': 'status', 'nginx.http.status.2xx': 'status', 'nginx.http.status.3xx': 'status', 'nginx.http.status.4xx': 'status', 'nginx.http.status.403': 'status', 'nginx.http.status.404': 'status', 'nginx.http.status.5xx': 'status', 'nginx.http.status.500': 'status', 'nginx.http.status.502': 'status', 'nginx.http.status.503': 'status', 'nginx.http.status.504': 'status', 'nginx.http.status.discarded': 'status', 'nginx.http.v0_9': 'server_protocol', 'nginx.http.v1_0': 'server_protocol', 'nginx.http.v1_1': 'server_protocol', 'nginx.http.v2': 'server_protocol', 'nginx.http.request.body_bytes_sent': 'body_bytes_sent', 'nginx.http.request.bytes_sent': 'bytes_sent', 'nginx.upstream.status.1xx': 'upstream_status', 'nginx.upstream.status.2xx': 'upstream_status', 'nginx.upstream.status.3xx': 'upstream_status', 'nginx.upstream.status.4xx': 'upstream_status', 'nginx.upstream.status.5xx': 'upstream_status', 'nginx.cache.bypass': '******', 'nginx.cache.expired': 'upstream_cache_status', 'nginx.cache.hit': 'upstream_cache_status', 'nginx.cache.miss': 'upstream_cache_status', 'nginx.cache.revalidated': 'upstream_cache_status', 'nginx.cache.stale': 'upstream_cache_status', 'nginx.cache.updating': 'upstream_cache_status', 'nginx.upstream.next.count': None, 'nginx.upstream.request.count': None } valid_http_methods = ('head', 'get', 'post', 'put', 'delete', 'options') valid_cache_statuses = ( 'bypass', 'expired', 'hit', 'miss', 'revalidated', 'stale', 'updating', ) def __init__(self, log_format=None, tail=None, **kwargs): super(NginxAccessLogsCollector, self).__init__(**kwargs) self.parser = NginxAccessLogParser(log_format) self.num_of_lines_in_log_format = self.parser.raw_format.count( '\n') + 1 self.tail = tail # syslog tails names are "<type>:<name>" self.name = tail.name.split(':')[-1] if isinstance(tail, Pipeline) \ else None self.filters = [] # skip empty filters and filters for other log file for log_filter in self.object.filters: if log_filter.empty: continue if not log_filter.matchfile(self.name): continue self.filters.append(log_filter) self.register( self.http_method, self.http_status, self.http_version, self.request_length, self.body_bytes_sent, self.bytes_sent, self.gzip_ration, self.request_time, self.upstreams, ) def init_counters(self, counters=None): for counter, key in self.counters.iteritems(): # If keys are in the parser format (access log) or not defined (error log) if key in self.parser.keys or key is None: self.object.statsd.incr(counter, value=0) # init counters for custom filters for counter in set(f.metric for f in self.filters): if counter in self.counters: self.count_custom_filter(self.filters, counter, 0, self.object.statsd.incr) def collect(self): self.init_counters() # set all counters to 0 count = 0 multiline_record = [] for line in self.tail: count += 1 # release GIL every 1000 of lines if count % (1000 * self.num_of_lines_in_log_format) == 0: time.sleep(0.001) # handle multiline log formats if self.num_of_lines_in_log_format > 1: multiline_record.append(line) if len(multiline_record) < self.num_of_lines_in_log_format: continue else: line = '\n'.join(multiline_record) multiline_record = [] try: parsed = self.parser.parse(line) except: context.log.debug('could not parse line %r' % line, exc_info=True) parsed = None if not parsed: continue if parsed['malformed']: self.request_malformed() else: # try to match custom filters and collect log metrics with them matched_filters = [ filter for filter in self.filters if filter.match(parsed) ] super(NginxAccessLogsCollector, self).collect(parsed, matched_filters) tail_name = self.tail.name if isinstance(self.tail, Pipeline) else 'list' context.log.debug('%s processed %s lines from %s' % (self.object.definition_hash, count, tail_name)) def request_malformed(self): """ nginx.http.request.malformed """ self.object.statsd.incr('nginx.http.request.malformed') def http_method(self, data, matched_filters=None): """ nginx.http.method.head nginx.http.method.get nginx.http.method.post nginx.http.method.put nginx.http.method.delete nginx.http.method.options nginx.http.method.other :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'request_method' in data: method = data['request_method'].lower() method = method if method in self.valid_http_methods else 'other' metric_name = 'nginx.http.method.%s' % method self.object.statsd.incr(metric_name) if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) def http_status(self, data, matched_filters=None): """ nginx.http.status.1xx nginx.http.status.2xx nginx.http.status.3xx nginx.http.status.4xx nginx.http.status.403 nginx.http.status.404 nginx.http.status.5xx nginx.http.status.500 nginx.http.status.502 nginx.http.status.503 nginx.http.status.504 nginx.http.status.discarded :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'status' in data: metrics_to_populate = [] http_status = data['status'] # add separate metrics for specific 4xx and 5xx codes if http_status.startswith('4'): if http_status in ('403', '404'): metrics_to_populate.append('nginx.http.status.%s' % http_status) elif http_status.startswith('5'): if http_status in ('500', '502', '503', '504'): metrics_to_populate.append('nginx.http.status.%s' % http_status) metrics_to_populate.append('nginx.http.status.%sxx' % http_status[0]) for metric_name in metrics_to_populate: self.object.statsd.incr(metric_name) if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) if data['status'] == '499': metric_name = 'nginx.http.status.discarded' self.object.statsd.incr(metric_name) if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) def http_version(self, data, matched_filters=None): """ nginx.http.v0_9 nginx.http.v1_0 nginx.http.v1_1 nginx.http.v2 :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'server_protocol' in data: proto = data['server_protocol'] if not proto.startswith('HTTP'): return version = proto.split('/')[-1] # Ordered roughly by expected popularity to reduce number of calls to `startswith` if version.startswith('1.1'): suffix = '1_1' elif version.startswith('2.0'): suffix = '2' elif version.startswith('1.0'): suffix = '1_0' elif version.startswith('0.9'): suffix = '0_9' else: suffix = version.replace('.', '_') metric_name = 'nginx.http.v%s' % suffix self.object.statsd.incr(metric_name) if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) def request_length(self, data, matched_filters=None): """ nginx.http.request.length :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'request_length' in data: metric_name, value = 'nginx.http.request.length', data[ 'request_length'] self.object.statsd.average(metric_name, value) if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.average) def body_bytes_sent(self, data, matched_filters=None): """ nginx.http.request.body_bytes_sent :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'body_bytes_sent' in data: metric_name, value = 'nginx.http.request.body_bytes_sent', data[ 'body_bytes_sent'] self.object.statsd.incr(metric_name, value) if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.incr) def bytes_sent(self, data, matched_filters=None): """ nginx.http.request.bytes_sent :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'bytes_sent' in data: metric_name, value = 'nginx.http.request.bytes_sent', data[ 'bytes_sent'] self.object.statsd.incr(metric_name, value) if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.incr) def gzip_ration(self, data, matched_filters=None): """ nginx.http.gzip.ratio :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'gzip_ratio' in data: metric_name, value = 'nginx.http.gzip.ratio', data['gzip_ratio'] self.object.statsd.average(metric_name, value) if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.average) def request_time(self, data, matched_filters=None): """ nginx.http.request.time nginx.http.request.time.median nginx.http.request.time.max nginx.http.request.time.pctl95 nginx.http.request.time.count :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'request_time' in data: metric_name, value = 'nginx.http.request.time', sum( data['request_time']) self.object.statsd.timer(metric_name, value) if matched_filters: self.count_custom_filter( self.create_parent_filters(matched_filters, parent_metric=metric_name), metric_name, value, self.object.statsd.timer) def upstreams(self, data, matched_filters=None): """ nginx.cache.bypass nginx.cache.expired nginx.cache.hit nginx.cache.miss nginx.cache.revalidated nginx.cache.stale nginx.cache.updating nginx.upstream.request.count nginx.upstream.next.count nginx.upstream.connect.time nginx.upstream.connect.time.median nginx.upstream.connect.time.max nginx.upstream.connect.time.pctl95 nginx.upstream.connect.time.count nginx.upstream.header.time nginx.upstream.header.time.median nginx.upstream.header.time.max nginx.upstream.header.time.pctl95 nginx.upstream.header.time.count nginx.upstream.response.time nginx.upstream.response.time.median nginx.upstream.response.time.max nginx.upstream.response.time.pctl95 nginx.upstream.response.time.count nginx.upstream.status.1xx nginx.upstream.status.2xx nginx.upstream.status.3xx nginx.upstream.status.4xx nginx.upstream.status.5xx nginx.upstream.response.length :param data: {} of parsed line :param matched_filters: [] of matched filters """ if not any( key.startswith('upstream') and data[key] not in ('-', '') for key in data): return # counters upstream_response = False if 'upstream_status' in data: for status in data[ 'upstream_status']: # upstream_status is parsed as a list if status.isdigit(): suffix = '%sxx' % status[0] metric_name = 'nginx.upstream.status.%s' % suffix upstream_response = True if suffix in ( '2xx', '3xx' ) else False # Set flag for upstream length processing self.object.statsd.incr(metric_name) if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) if upstream_response and 'upstream_response_length' in data: metric_name, value = 'nginx.upstream.response.length', data[ 'upstream_response_length'] self.object.statsd.average(metric_name, value) if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.average) # gauges upstream_switches = None for metric_name, key_name in { 'nginx.upstream.connect.time': 'upstream_connect_time', 'nginx.upstream.response.time': 'upstream_response_time', 'nginx.upstream.header.time': 'upstream_header_time' }.iteritems(): if key_name in data: values = data[key_name] # set upstream switches one time if len(values) > 1 and upstream_switches is None: upstream_switches = len(values) - 1 # store all values value = sum(values) self.object.statsd.timer(metric_name, value) if matched_filters: self.count_custom_filter( self.create_parent_filters(matched_filters, parent_metric=metric_name), metric_name, value, self.object.statsd.timer) # log upstream switches metric_name, value = 'nginx.upstream.next.count', 0 if upstream_switches is None else upstream_switches self.object.statsd.incr(metric_name, value) if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.incr) # cache if 'upstream_cache_status' in data: cache_status = data['upstream_cache_status'] cache_status_lower = cache_status.lower() if cache_status_lower in self.valid_cache_statuses: metric_name = 'nginx.cache.%s' % cache_status_lower self.object.statsd.incr(metric_name) if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) # log total upstream requests metric_name = 'nginx.upstream.request.count' self.object.statsd.incr(metric_name) if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) @staticmethod def create_parent_filters(original_filters, parent_metric): """ median, max, pctl95, and count are created in statsd.flush(). So if a filter on nginx.upstream.response.time.median is created, the filter metric should be truncated to nginx.upstream.response.time :param original_filters: :param truncated_metric: :return: """ parent_filters = [] for original_filter in original_filters: if parent_metric not in original_filter.metric: continue parent_filter = copy.deepcopy(original_filter) parent_filter.metric = parent_metric parent_filters.append(parent_filter) return parent_filters @staticmethod def count_custom_filter(matched_filters, metric_name, value, method): """ Collect custom metric :param matched_filters: [] of matched filters :param metric_name: str metric name :param value: int/float value :param method: function to call :return: """ for log_filter in matched_filters: if log_filter.metric == metric_name: full_metric_name = '%s||%s' % (log_filter.metric, log_filter.filter_rule_id) method(full_metric_name, value)
class NginxAccessLogsCollector(AbstractCollector): short_name = 'nginx_alog' counters = { 'nginx.http.method.head': 'request_method', 'nginx.http.method.get': 'request_method', 'nginx.http.method.post': 'request_method', 'nginx.http.method.put': 'request_method', 'nginx.http.method.delete': 'request_method', 'nginx.http.method.options': 'request_method', 'nginx.http.method.other': 'request_method', 'nginx.http.status.1xx': 'status', 'nginx.http.status.2xx': 'status', 'nginx.http.status.3xx': 'status', 'nginx.http.status.4xx': 'status', 'nginx.http.status.5xx': 'status', 'nginx.http.status.discarded': 'status', 'nginx.http.v0_9': 'server_protocol', 'nginx.http.v1_0': 'server_protocol', 'nginx.http.v1_1': 'server_protocol', 'nginx.http.v2': 'server_protocol', 'nginx.http.request.body_bytes_sent': 'body_bytes_sent', 'nginx.http.request.bytes_sent': 'bytes_sent', 'nginx.upstream.status.1xx': 'upstream_status', 'nginx.upstream.status.2xx': 'upstream_status', 'nginx.upstream.status.3xx': 'upstream_status', 'nginx.upstream.status.4xx': 'upstream_status', 'nginx.upstream.status.5xx': 'upstream_status', 'nginx.cache.bypass': '******', 'nginx.cache.expired': 'upstream_cache_status', 'nginx.cache.hit': 'upstream_cache_status', 'nginx.cache.miss': 'upstream_cache_status', 'nginx.cache.revalidated': 'upstream_cache_status', 'nginx.cache.stale': 'upstream_cache_status', 'nginx.cache.updating': 'upstream_cache_status', 'nginx.upstream.next.count': None, 'nginx.upstream.request.count': None } valid_http_methods = ('head', 'get', 'post', 'put', 'delete', 'options') def __init__(self, filename=None, log_format=None, tail=None, **kwargs): super(NginxAccessLogsCollector, self).__init__(**kwargs) self.filename = filename self.parser = NginxAccessLogParser(log_format) self.tail = tail if tail is not None else FileTail(filename) self.filters = [] # skip empty filters and filters for other log file for log_filter in self.object.filters: if log_filter.empty: continue if log_filter.filename and log_filter.filename != self.filename: continue self.filters.append(log_filter) self.register( self.http_method, self.http_status, self.http_version, self.request_length, self.body_bytes_sent, self.bytes_sent, self.gzip_ration, self.request_time, self.upstreams, ) def init_counters(self): for counter, key in self.counters.iteritems(): # If keys are in the parser format (access log) or not defined (error log) if key in self.parser.keys or key is None: self.object.statsd.incr(counter, value=0) # init counters for custom filters for counter in set(f.metric for f in self.filters): self.count_custom_filter(self.filters, counter, 0, self.object.statsd.incr) def collect(self): self.init_counters() # set all counters to 0 count = 0 for line in self.tail: count += 1 try: parsed = self.parser.parse(line) except: context.log.debug('could not parse line %s' % line, exc_info=True) parsed = None if not parsed: continue if parsed['malformed']: self.request_malformed() else: # try to match custom filters and collect log metrics with them matched_filters = [ filter for filter in self.filters if filter.match(parsed) ] super(NginxAccessLogsCollector, self).collect(parsed, matched_filters) tail_name = self.tail.name if isinstance(self.tail, Pipeline) else 'list' context.log.debug('%s processed %s lines from %s' % (self.object.definition_hash, count, tail_name)) def request_malformed(self): """ nginx.http.request.malformed """ self.object.statsd.incr('nginx.http.request.malformed') def http_method(self, data, matched_filters=None): """ nginx.http.method.head nginx.http.method.get nginx.http.method.post nginx.http.method.put nginx.http.method.delete nginx.http.method.options nginx.http.method.other :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'request_method' in data: method = data['request_method'].lower() method = method if method in self.valid_http_methods else 'other' metric_name = 'nginx.http.method.%s' % method self.object.statsd.incr(metric_name) if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) def http_status(self, data, matched_filters=None): """ nginx.http.status.1xx nginx.http.status.2xx nginx.http.status.3xx nginx.http.status.4xx nginx.http.status.5xx nginx.http.status.discarded :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'status' in data: status = data['status'] suffix = 'discarded' if status in ('499', '444', '408') else '%sxx' % status[0] metric_name = 'nginx.http.status.%s' % suffix self.object.statsd.incr(metric_name) if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) def http_version(self, data, matched_filters=None): """ nginx.http.v0_9 nginx.http.v1_0 nginx.http.v1_1 nginx.http.v2 :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'server_protocol' in data: proto = data['server_protocol'] if not proto.startswith('HTTP'): return version = proto.split('/')[-1] # Ordered roughly by expected popularity to reduce number of calls to `startswith` if version.startswith('1.1'): suffix = '1_1' elif version.startswith('2.0'): suffix = '2' elif version.startswith('1.0'): suffix = '1_0' elif version.startswith('0.9'): suffix = '0_9' else: suffix = version.replace('.', '_') metric_name = 'nginx.http.v%s' % suffix self.object.statsd.incr(metric_name) if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) def request_length(self, data, matched_filters=None): """ nginx.http.request.length :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'request_length' in data: metric_name, value = 'nginx.http.request.length', data[ 'request_length'] self.object.statsd.average(metric_name, value) if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.average) def body_bytes_sent(self, data, matched_filters=None): """ nginx.http.request.body_bytes_sent :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'body_bytes_sent' in data: metric_name, value = 'nginx.http.request.body_bytes_sent', data[ 'body_bytes_sent'] self.object.statsd.incr(metric_name, value) if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.incr) def bytes_sent(self, data, matched_filters=None): """ nginx.http.request.bytes_sent :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'bytes_sent' in data: metric_name, value = 'nginx.http.request.bytes_sent', data[ 'bytes_sent'] self.object.statsd.incr(metric_name, value) if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.incr) def gzip_ration(self, data, matched_filters=None): """ nginx.http.gzip.ratio :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'gzip_ratio' in data: metric_name, value = 'nginx.http.gzip.ratio', data['gzip_ratio'] self.object.statsd.average(metric_name, value) if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.average) def request_time(self, data, matched_filters=None): """ nginx.http.request.time nginx.http.request.time.median nginx.http.request.time.max nginx.http.request.time.pctl95 nginx.http.request.time.count :param data: {} of parsed line :param matched_filters: [] of matched filters """ if 'request_time' in data: metric_name, value = 'nginx.http.request.time', sum( data['request_time']) self.object.statsd.timer(metric_name, value) if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.timer) def upstreams(self, data, matched_filters=None): """ nginx.cache.bypass nginx.cache.expired nginx.cache.hit nginx.cache.miss nginx.cache.revalidated nginx.cache.stale nginx.cache.updating nginx.upstream.request.count nginx.upstream.next.count nginx.upstream.connect.time nginx.upstream.connect.time.median nginx.upstream.connect.time.max nginx.upstream.connect.time.pctl95 nginx.upstream.connect.time.count nginx.upstream.header.time nginx.upstream.header.time.median nginx.upstream.header.time.max nginx.upstream.header.time.pctl95 nginx.upstream.header.time.count nginx.upstream.response.time nginx.upstream.response.time.median nginx.upstream.response.time.max nginx.upstream.response.time.pctl95 nginx.upstream.response.time.count nginx.upstream.status.1xx nginx.upstream.status.2xx nginx.upstream.status.3xx nginx.upstream.status.4xx nginx.upstream.status.5xx nginx.upstream.response.length :param data: {} of parsed line :param matched_filters: [] of matched filters """ if not any( key.startswith('upstream') and data[key] not in ('-', '') for key in data): return # counters upstream_response = False if 'upstream_status' in data: for status in data[ 'upstream_status']: # upstream_status is parsed as a list if status.isdigit(): suffix = '%sxx' % status[0] metric_name = 'nginx.upstream.status.%s' % suffix upstream_response = True if suffix in ( '2xx', '3xx' ) else False # Set flag for upstream length processing self.object.statsd.incr(metric_name) if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) if upstream_response and 'upstream_response_length' in data: metric_name, value = 'nginx.upstream.response.length', data[ 'upstream_response_length'] self.object.statsd.average(metric_name, value) if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.average) # gauges upstream_switches = None for metric_name, key_name in { 'nginx.upstream.connect.time': 'upstream_connect_time', 'nginx.upstream.response.time': 'upstream_response_time', 'nginx.upstream.header.time': 'upstream_header_time' }.iteritems(): if key_name in data: values = data[key_name] # set upstream switches one time if len(values) > 1 and upstream_switches is None: upstream_switches = len(values) - 1 # store all values value = sum(values) self.object.statsd.timer(metric_name, value) if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.timer) # log upstream switches metric_name, value = 'nginx.upstream.next.count', 0 if upstream_switches is None else upstream_switches self.object.statsd.incr(metric_name, value) if matched_filters: self.count_custom_filter(matched_filters, metric_name, value, self.object.statsd.incr) # cache if 'upstream_cache_status' in data: cache_status = data['upstream_cache_status'] if cache_status != '-': metric_name = 'nginx.cache.%s' % cache_status.lower() self.object.statsd.incr(metric_name) if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) # log total upstream requests metric_name = 'nginx.upstream.request.count' self.object.statsd.incr(metric_name) if matched_filters: self.count_custom_filter(matched_filters, metric_name, 1, self.object.statsd.incr) @staticmethod def count_custom_filter(matched_filters, metric_name, value, method): """ Collect custom metric :param matched_filters: [] of matched filters :param metric_name: str metric name :param value: int/float value :param method: function to call :return: """ for log_filter in matched_filters: if log_filter.metric == metric_name: full_metric_name = '%s||%s' % (log_filter.metric, log_filter.filter_rule_id) method(full_metric_name, value)