def test_parse_line_split_upstream_log_format_empty_upstreams(self): log_format = '$remote_addr - $remote_user [$time_local] ' + \ '"$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" ' + \ 'rt=$request_time cs=$upstream_cache_status ut="$upstream_response_time"' keys, _, non_key_patterns, first_value_is_key = decompose_format( log_format, full=True ) line = \ '1.2.3.4 - - [22/Jan/2010:19:34:21 +0300] "GET /foo/ HTTP/1.1" 200 11078 ' + \ '"http://www.rambler.ru/" "Mozilla/5.0 (Windows; U; Windows NT 5.1" rt=0.010 cs=- ut="-"' results = parse_line_split( line, keys=keys, non_key_patterns=non_key_patterns, first_value_is_key=first_value_is_key ) assert_that(results, not_none()) for key in keys: assert_that(results, has_item(key)) assert_that(results[key], not_none()) # check the last value to make sure complete parse assert_that(results['upstream_response_time'], equal_to('-')) assert_that(results['upstream_cache_status'], equal_to('-'))
def test_parse_line_split(self): keys, _, non_key_patterns, first_value_is_key = decompose_format(COMBINED_FORMAT, full=True) line = '127.0.0.1 - - [02/Jul/2015:14:49:48 +0000] "GET /basic_status HTTP/1.1" 200 110 "-" ' + \ '"python-requests/2.2.1 CPython/2.7.6 Linux/3.13.0-48-generic"' results = parse_line_split( line, keys=keys, non_key_patterns=non_key_patterns, first_value_is_key=first_value_is_key ) assert_that(results, not_none()) for key in keys: assert_that(results, has_item(key)) assert_that(results[key], not_none()) # check the last value to make sure complete parse assert_that(results['http_user_agent'], equal_to( 'python-requests/2.2.1 CPython/2.7.6 Linux/3.13.0-48-generic' ))
def parse(self, line): """ Parses the line and if there are some special fields - parse them too For example we can get HTTP method and HTTP version from request The difference between this and above is that this one uses split mechanic rather than trie matching direclty. :param line: log line :return: dict with parsed info """ result = {'malformed': False} # parse the line parsed = parse_line_split(line, keys=self.keys, non_key_patterns=self.non_key_patterns, first_value_is_key=self.first_value_is_key) if parsed: for key in self.keys: # key local vars time_var = False func = self.common_variables[key][1] \ if key in self.common_variables \ else self.default_variable[1] try: value = func(parsed[key]) # for example gzip ratio can be '-' and float except ValueError: # couldn't cast log value value = 0 except KeyError: # something went wrong with line parsing context.default_log.warn( 'failed to find expected log variable "%s" in access ' 'log line, skipping' % key) context.default_log.debug('additional info:') context.default_log.debug( 'keys: %s\nformat: "%s"\nline:"%s"' % (self.keys, self.raw_format, line)) # time variables should be parsed to array of float if key.endswith('_time'): time_var = True # skip empty vars if value not in ('', '-'): array_value = [] for x in value.replace(' ', '').split(','): x = float(x) # workaround for an old nginx bug with time. ask lonerr@ for details if x > 10000000: continue else: array_value.append(x) if array_value: result[key] = array_value # Handle comma separated keys if key in self.comma_separated_keys: if ',' in value: list_value = value.replace(' ', '').split( ',') # remove spaces and split values into list result[key] = list_value else: result[key] = [value] if key not in result and not time_var: result[key] = value else: context.default_log.debug( 'could not parse line "%s" with format "%s"' % (line, self.raw_format)) return None if 'request' in result: try: method, uri, proto = result['request'].split(' ') result['request_method'] = method result['request_uri'] = uri result['server_protocol'] = proto except: result['malformed'] = True method = '' if not result['malformed'] and len(method) < 3: result['malformed'] = True return result