def test_pr8(self): parser = apache_log_parser.make_parser('%h %{remote}p %v %{local}p %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %P %D %{number}n %{SSL_PROTOCOL}x %{SSL_CIPHER}x %k %{UNIQUE_ID}e ') data = parser('127.0.0.1 50153 mysite.co.uk 443 [28/Nov/2014:10:03:40 +0000] "GET /mypage/this/that?stuff=all HTTP/1.1" 200 5129 "-" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36" 18572 363701 0 TLSv1.01 MY-CYPHER 0 VHhIfKwQGCMAAEiMUIAAAAF ') self.assertEqual(data, { 'status': '200', 'extension_ssl_protocol': 'TLSv1.01', 'request_header_user_agent__browser__family': 'Chrome', 'time_us': '363701', 'num_keepalives': '0', 'request_first_line': 'GET /mypage/this/that?stuff=all HTTP/1.1', 'pid': '18572', 'response_bytes_clf': '5129', 'request_header_user_agent__os__family': u'Windows 7', 'request_url': '/mypage/this/that?stuff=all', 'request_http_ver': '1.1', 'request_header_referer': '-', 'server_name': 'mysite.co.uk', 'request_header_user_agent__is_mobile': False, 'request_header_user_agent__browser__version_string': '37.0.2062', 'request_header_user_agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36', 'note_number': '0', 'request_header_user_agent__os__version_string': '', 'server_port_local': '443', 'request_method': 'GET', 'server_port_remote': '50153', 'env_unique_id': 'VHhIfKwQGCMAAEiMUIAAAAF', 'time_received_datetimeobj': datetime.datetime(2014, 11, 28, 10, 3, 40), 'time_received_isoformat': '2014-11-28T10:03:40', 'remote_host': '127.0.0.1', 'time_received': '[28/Nov/2014:10:03:40 +0000]', 'time_received_tz_datetimeobj': datetime.datetime(2014, 11, 28, 10, 3, 40, tzinfo=apache_log_parser.FixedOffset("0000")), 'time_received_tz_isoformat': '2014-11-28T10:03:40+00:00', 'remote_host': '127.0.0.1', 'time_received_utc_datetimeobj': datetime.datetime(2014, 11, 28, 10, 3, 40, tzinfo=apache_log_parser.FixedOffset("0000")), 'time_received_utc_isoformat': '2014-11-28T10:03:40+00:00', 'remote_host': '127.0.0.1', 'extension_ssl_cipher': 'MY-CYPHER', }) parser = apache_log_parser.make_parser('%A %V %p %P %a \"%r\" \"%{main_call}n\" %{some_time}t %b %>s %D %{UNIQUE_ID}e ') data = parser('127.0.0.1 othersite 80 25572 192.168.1.100 "GET /Class/method/ HTTP/1.1" "-" 20141128155031 2266 200 10991 VHiZx6wQGCMAAEiBE8kAAAAA:VHiZx6wQGiMAAGPkBnMAAAAH:VHiZx6wQGiMAAGPkBnMAAAAH ') self.assertEqual(data, { 'status': '200', 'note_main_call': '-', 'time_some_time': '20141128155031', 'time_us': '10991', 'request_http_ver': '1.1', 'local_ip': '127.0.0.1', 'pid': '25572', 'request_first_line': 'GET /Class/method/ HTTP/1.1', 'request_method': 'GET', 'server_port': '80', 'response_bytes_clf': '2266', 'server_name2': 'othersite', 'request_url': '/Class/method/', 'env_unique_id': 'VHiZx6wQGCMAAEiBE8kAAAAA:VHiZx6wQGiMAAGPkBnMAAAAH:VHiZx6wQGiMAAGPkBnMAAAAH', 'remote_ip': '192.168.1.100'})
def __init__(self, config): super().__init__() self.config = config # Open the log file for reading and seek to the end of it. self.logHandle = open(self.config.logFilePath) self.logHandle.seek(0, 2) # This parser is used to parse every log line. self.logParser = apache_log_parser.make_parser(LogStats.LOG_FORMAT) # If the first parser fails, we try this one. self.logParserAlt = apache_log_parser.make_parser( LogStats.LOG_FORMAT_ALT) # This lock grants exclusive access to data structures below. self.lock = Lock() # Various statistics. self.numHits = 0 # Total number of requests. self.numBadLines = 0 # Number of log lines that could not be parsed. self.responseBytesTot = 0 # Total response bytes sent. self.retCode2count = defaultdict(int) # Count for each status code. self.method2count = defaultdict(int) # Count for each request method. # This heap keeps track of all sections we have seen so far and their counts. self.heap = Heap() # Create the alerter and start its event loop in a separate process. self.alerter = Alerter(self.config.numHitsToGenAlert, self.config.alertWinLenSecs) self.alerterProc = multiprocessing.Process( target=self.alerter.runAlerter) self.alerterProc.start()
def __init__(self,pidFile,socketFile,netrcFile): super(LogSocketDaemon, self).__init__(pidfile=pidFile) self.socketFile=socketFile self.logger=logging.getLogger("LogSocketDaemon") self.logDBConn=None #netrc file with passwords and logins self.netrc=netrc.netrc(netrcFile) self.onlyLogGeoserver=True #it has to be the same as the Apache log self.parser=apache_log_parser.make_parser('%v:%p %a %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"') self.parserAlternative=apache_log_parser.make_parser('%v:%p %a %l %u %t \"%r\" %>s %O')
def __init__(self, line, server): self._log_line_dict = None if (server == 'catalina'): line_parser = apache_log_parser.make_parser( "%h - - %t \"%r\" %>s %b") elif (server == 'apache'): line_parser = apache_log_parser.make_parser( "%h <<%P>> %t %Dus \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %l %u" ) else: line_parser = apache_log_parser.make_parser( "%h - - %t \"%r\" %>s %b") self._log_line_dict = line_parser(line)
def connections(self, linha): try: line_parser = apache_log_parser.make_parser( self.conf['apache_mask']) log = line_parser(linha) if self.conf['vhost_enable']: log['vhost'] = linha.split(' ')[0] else: log['vhost'] = None log['owasp'] = self.owasp(log['request_url']) if log['owasp']: self.send_all(log) except: pass try: log = linha if "body" in log: if log['body'] == "Bad Request": url = log['body']['url'] else: url = log['url'] if "statusCode" not in log: log['statusCode'] = "Null" if "url" not in log and "url" not in log['body']: url = "Null" url = url.replace("[masked_session_id]", "").replace("[masked_api_key]", "") log['url'] = url log['owasp'] = self.owasp(log['url']) if log['owasp']: self.send_all(log) except Exception as e: print(e, "error") pass
def second(s): file_path = path.relpath(s) with open(file_path) as f: new = ' ' + f.read() line_parser = apache_log_parser.make_parser("%h %a %v %U %u %f %H %m %X %l %r %p %P %q %R %T ") log_line_data = line_parser(new) pprint(log_line_data)
def eval_data(self, time_frame, analyzed_time, given_time, confirmed_ips): """ Devuelve una lista con un elemento por cada uno de los últimos 'check_interval' minutos antes de la hora 'given_time'. Cada elemento de la lista devuelta contiene el valor acumulado de las detecciones durante los 'time_frame' minutos anteriores. """ eval_time = time_frame + analyzed_time detect_list = [0] * eval_time acum_list = [0] * analyzed_time if not self.enabled: return acum_list time_now_utc = datetime(given_time.year, given_time.month, given_time.day, given_time.hour, given_time.minute) addr = urlparse.urlparse( self.__access_log) # Se obtiene el nombre del fichero de eventos filename = "data/bl-" + addr.hostname + '-' + self.person + "-events.log" line_parser = apache_log_parser.make_parser( self.__access_log_format.decode('string_escape')) with open(filename, 'r') as f: linea = f.readline( ) # Detección de zona horaria en la primera linea del log if linea: p = re.compile(r"[\+|-]\d\d\d\d\]") tz = p.findall(linea)[0] timezone = timedelta(hours=int(tz[0:3]), minutes=int(tz[0] + tz[3:5])) while linea: log_line_data = line_parser(linea) if confirmed_ips.count(log_line_data['remote_host']): l = log_line_data['time_received_datetimeobj'] line_time_utc = datetime(l.year, l.month, l.day, l.hour, l.minute) - timezone if line_time_utc > time_now_utc: break i = int((time_now_utc - line_time_utc).total_seconds() / 60) # Conversión hora a índice de la lista if i < eval_time: detect_list[ eval_time - i - 1] += self.__weight # Lista de pesos de detección linea = f.readline() #print "Detect list:", detect_list for i in range( 1, analyzed_time + 1 ): # Acumulacción de pesos de detección para los rangos dados #print "acumulado", analyzed_time - i, "= suma desde", eval_time - time_frame - i, "hasta", eval_time - i, "=", detect_list[eval_time - time_frame - i:eval_time - i + 1], "=", sum(detect_list[eval_time - time_frame - i:eval_time - i]) acum_list[analyzed_time - i] = sum( detect_list[eval_time - time_frame - i:eval_time - i + 1]) return acum_list
def create_log_parser(): "Create parser for apache log entries (webfaction default)" # apache config: # %{X-Forwarded-For}i %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" format_string = r'%a %l %u %t "%r" %>s %b "%{Referer}i" "%{User-Agent}i"' log_parser = apache_log_parser.make_parser(format_string) return log_parser
def file_parser(filename): """file_parser Opens the log file, parse each line into a log entry, sort the entries based on the timestamp of when the server recieved the request and yield lists of log entires that occured in the same minute. The logformat is assumed to be "%a %l %u %t \"%r\" %>s %b %D" :param filename: The filename of the log file to parse. :returns: A list containing the parsed entries sorted by the time they were received by the server. """ LOG_FORMAT = "%a %l %u %t \"%r\" %>s %b %D" line_parser = apache_log_parser.make_parser(LOG_FORMAT) parsed_entries = [] with open(filename) as f: for line in f: parsed_entries.append(line_parser(line)) # Sort the parsed log entries by timestamp. Some of the log entries in the # provided example take a long time to process so they are not in order, # this messes up splitting the entries into minute chunks for processing. parsed_entries.sort(key=lambda x: x.get('time_received_utc_datetimeobj')) return parsed_entries
def test_simple(self): format_string = "%h <<%P>> %t %Dus \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %l %u" parser = apache_log_parser.make_parser(format_string) sample = '127.0.0.1 <<6113>> [16/Aug/2013:15:45:34 +0000] 1966093us "GET / HTTP/1.1" 200 3478 "https://example.com/" "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18)" - -' log_data = parser(sample) self.assertNotEqual(log_data, None) self.assertEqual(log_data['status'], '200') self.assertEqual(log_data['pid'], '6113') self.assertEqual(log_data['request_first_line'], 'GET / HTTP/1.1') self.assertEqual(log_data['request_method'], 'GET') self.assertEqual(log_data['request_url'], '/') self.assertEqual(log_data['request_header_referer'], 'https://example.com/') self.assertEqual( log_data['request_header_user_agent'], 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18)') self.assertEqual(log_data['request_header_user_agent__os__family'], 'Linux') self.assertEqual( apache_log_parser.get_fieldnames(format_string), ('remote_host', 'pid', 'time_received', 'time_us', 'request_first_line', 'status', 'response_bytes_clf', 'request_header_referer', 'request_header_user_agent', 'remote_logname', 'remote_user'))
def get_point(self): parsers = {} for log_format in self.logformats.keys(): parsers[log_format] = apache_log_parser.make_parser(self.logformats[log_format]) last_linecount = {} while True: result = {} for file in self.accesslogs: filename = file[0] fileformat = file[1] with open(filename) as logfile: lines = logfile.readlines() start = 0 if filename in last_linecount.keys() and len(lines) > last_linecount[filename]: start = last_linecount[filename] last_linecount[filename] = len(lines) new_lines = lines[start:] for line in new_lines: parsed = parsers[fileformat](line[:-1]) status = int(parsed['status']) if status in result.keys(): result[status] += 1 else: result[status] = 1 yield result sleep(60)
def read_apache_log(fn,P,area,logformat='%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"'): parser = apache_log_parser.make_parser(logformat) cou = area["line_num"] count = 0 with open(fn) as f: for line in f: if(cou > 0): cou -=1 continue count += 1 try: parsed_line = parser(line) host, time = parsed_line["remote_host"], parsed_line["time_received_tz_datetimeobj"] if(area["flag"] == "n"): P.append([host,time]) elif(area["from"] <= time <= area["to"]): P.append([host,time]) except ValueError: pass mem = psutil.virtual_memory() if(mem.percent >= area["memory_usage"]): area["line_num"] += count area["memory_flag"] = "on" break else: area["memory_flag"] = "ok"
def on_modified(self, event): with open(event.src_path, 'r') as log_source: log_string = ' ' + log_source.read() line_parser = apache_log_parser.make_parser( "%h %a %v %U %u %f %H %m %X %l %r %p %P %q %R %T ") log_line_data = line_parser(log_string) print(log_line_data)
def eval_data(self, time_frame, analyzed_time, given_time, confirmed_ips): """ Devuelve una lista con un elemento por cada uno de los últimos 'check_interval' minutos antes de la hora 'given_time'. Cada elemento de la lista devuelta contiene el valor acumulado de las detecciones durante los 'time_frame' minutos anteriores. """ eval_time = time_frame + analyzed_time detect_list = [0] * eval_time acum_list = [0] * analyzed_time if not self.enabled: return acum_list time_now_utc = datetime(given_time.year, given_time.month, given_time.day, given_time.hour, given_time.minute) line_parser = apache_log_parser.make_parser(self.__webbug_log_format.decode('string_escape')) for remoteaddr in self.__webbug_log: addr = urlparse.urlparse(remoteaddr) # Se obtiene el nombre del fichero de eventos filename = "data/wb-" + addr.hostname + '-' + self.person + "-events.log" with open(filename, 'r') as f: linea = f.readline() # Detección de zona horaria en la primera linea del log if linea: p = re.compile(r"[\+|-]\d\d\d\d\]") tz = p.findall(linea)[0] timezone = timedelta(hours=int(tz[0:3]), minutes=int(tz[0]+tz[3:5])) visiting_ips = [] while linea: log_line_data = line_parser(linea) current_ip = log_line_data['remote_host'] if confirmed_ips.count(current_ip): l = log_line_data['time_received_datetimeobj'] line_time_utc = datetime(l.year, l.month, l.day, l.hour, l.minute) - timezone if line_time_utc > time_now_utc: break i = int((time_now_utc - line_time_utc).total_seconds()/60) # Conversión hora a índice if i < eval_time: ref = Referer(log_line_data['request_header_referer']) origin = urlparse.urlparse(log_line_data['request_first_line']) if (ref.medium == 'search') and (ref.search_term is not None): # Una búsqueda con términos detect_list[eval_time - i - 1] += self.__weight elif (ref.medium == 'search') and (ref.search_term is None): # Una búsqueda sin términos detect_list[eval_time - i - 1] += self.__weight_no_search_terms elif (self.__weight_visit > 0) and \ (not visiting_ips.count([current_ip, origin.hostname])): # Una simple visita visiting_ips.append([current_ip, origin.hostname]) # Solo puntuan una vez por ip/origen detect_list[eval_time - i - 1] += self.__weight_visit linea = f.readline() for i in range(1, analyzed_time + 1): # Acumulacción de pesos de detección para los rangos dados #print "acumulado", analyzed_time - i, "= suma desde", eval_time - time_frame - i, "hasta", eval_time - i, "=", detect_list[eval_time - time_frame - i:eval_time - i + 1], "=", sum(detect_list[eval_time - time_frame - i:eval_time - i]) acum_list[analyzed_time - i] = sum(detect_list[eval_time - time_frame - i:eval_time - i + 1]) return acum_list
def parse_log_line(line): line_parser = apache_log_parser.make_parser( "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"") log_line_data = line_parser(line) user_agent = log_line_data['request_header_user_agent__browser__family'] user = log_line_data['remote_user'] method = log_line_data['request_method'] status = log_line_data['status'] timestamp = log_line_data['time_received_isoformat'] #pprint(log_line_data) pri = 3 * 8 + 2 if status.startswith("5"): pri = 1 * 8 + 2 if status.startswith("4"): pri = 2 * 8 + 2 version = 1 hostname = socket.gethostname() appname = "Server" procid = "-" msgid = "-" sd = "[%s %s=\"%s\" %s=\"%s\" %s=\"%s\" %s=\"%s\"]" % \ ("SSID", "User-Agent", user_agent, "user", user, "req-method", method, "status", status) msg = "-" return "<%s>%s %s %s %s %s %s %s %s\n" % ( pri, version, timestamp, hostname, appname, procid, msgid, sd, msg)
def connections(linha): line_parser = apache_log_parser.make_parser(conf['apache_mask']) log = line_parser(linha) if conf['vhost_enable'] == 'True': log['vhost'] = linha.split(' ')[0] else: log['vhost'] = None log['owasp'] = owasp(log['request_url']) if log['owasp']: msg ='''[+] - Intrusion Attempt - [+] Date: %s Vhost: %s IP: %s Path: %s User-Agent: %s Browser: %s S.O: %s Description: %s Impact: %s Category: %s''' %(log['time_received'],log['vhost'],log['remote_host'],log['request_url'], log['request_header_user_agent'],log['request_header_user_agent__browser__family']+' '+log['request_header_user_agent__browser__version_string'], log['request_header_user_agent__os__family'],log['owasp']['description'], log['owasp']['impact'],','.join(log['owasp']['tags']['tag'])) print msg print if conf['zabbix_enable'] == 'True': send_zabbix(msg) if conf['telegram_enable'] == 'True': time.sleep(3) bot.sendMessage(conf['group_id'], msg)
class LogEntryMonitoring(Thread): PARSER = apache_log_parser.make_parser('%h %u %l %t "%r" %s %B') def __init__(self, publisher, file_path): Thread.__init__(self) self.daemon = True self.publisher = publisher self.file_path = file_path def run(self): file = open(self.file_path, 'r') file.seek(0, 2) while True: line = file.readline() if not line.strip(): time.sleep(0.25) continue try: entry = self.parse_line(line) self._fire_new_entry_event(entry) except LineDoesntMatchException: pass @staticmethod def parse_line(line): parts = LogEntryMonitoring.PARSER(line) return Entry.factory(parts) def _fire_new_entry_event(self, entry): self.publisher.sendMessage(TopicUtils.NEW_ENTRY_TOPIC, entry=entry)
def parse_url_and_time(base, format): """ Given a list of files/directories, parse each line with apache-log-parser, and extract the URL and time data and yield that. """ parser = apache_log_parser.make_parser(format) for filename in files(base): for line in open_anything(filename): try: match = parser(line) except apache_log_parser.LineDoesntMatchException as ex: # Ignore it pass else: # Extract matching data from regex results = { 'url': match['request_url'], 'microsec': int(match['time_us']), 'method': match['request_method'], 'ipaddr': match['remote_host'], 'datetime': match['time_recieved_isoformat'] } yield results
def main(**kwargs): print('Converting, please wait...') line_parser = apache_log_parser.make_parser(kwargs['format']) header = True with open(kwargs['input'], 'rb') as inFile, open(kwargs['output'], 'w') as outFile: lines = inFile.readlines() writer = csv.writer(outFile, delimiter=';') for line in lines: try: log_line_data = line_parser(line) except apache_log_parser.LineDoesntMatchException as ex: print(bcolors.FAIL + 'The format specified does not match the log file. Aborting...' + bcolors.ENDC) print('Line: ' + ex.log_line + 'RegEx: ' + ex.regex) exit() if header: writer.writerow(list(log_line_data.keys())) header = False else: writer.writerow(list(log_line_data.values())) print(bcolors.OKGREEN + 'Conversion finished.' + bcolors.ENDC)
def get_ips(self, time_frame, given_time): """ Devuelve una lista con cada una de las IP's detectadas durante los 'time_frame' minutos previos a la hora 'given_time'. Acompañando a cara IP irá una marca de verificación (True/False), que indicará si la detección de esa IP es concluyente o es necesaria la confirmación del positivo por parte de otro plugin. """ ip_list = [] if not self.enabled: return ip_list delta_frame = timedelta(minutes=time_frame) line_parser = apache_log_parser.make_parser( self.__webbug_log_format.decode('string_escape')) for remoteaddr in self.__webbug_log: addr = urlparse.urlparse( remoteaddr) # Se obtiene el nombre del fichero de eventos filename = "data/wb-" + addr.hostname + '-' + self.person + "-events.log" with open(filename, 'r') as f: linea = f.readline( ) # Detección de zona horaria en la primera linea del log if linea: p = re.compile(r"[\+|-]\d\d\d\d\]") tz = p.findall(linea)[0] timezone = timedelta(hours=int(tz[0:3]), minutes=int(tz[0] + tz[3:5])) while True: if not linea: break log_line_data = line_parser(linea) line_time_utc = log_line_data[ 'time_received_datetimeobj'] - timezone if line_time_utc > given_time: break if line_time_utc > given_time - delta_frame: ip = log_line_data['remote_host'] ip_only_list = [a[0] for a in ip_list] origin = urlparse.urlparse( log_line_data['request_first_line']).hostname ref = Referer(log_line_data['request_header_referer']) if not ip_only_list.count(ip): if (ref.medium == 'search') and (ref.search_term is not None): ip_list.append([ip, True, origin ]) # IP nueva, la añadimos else: ip_list.append([ip, False, origin]) elif origin != ip_list[ip_only_list.index(ip)][2]: ip_list[ip_only_list.index( ip )][1] = True # IP repetida en página distinta, es un positivo linea = f.readline() return [c[0:2] for c in ip_list]
def parse_apache_log(log): # default parser is for APache Access Logs parsed = None # i the identified file name is apache access log # if 'access_log' == str(id_helper[:10]): parser_regex = ["%h <<%P>> %t %Dus \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %l %u", "%h %l %u %t \"%r\" %>s %b", "%t %h %{SSL_PROTOCOL}x %{SSL_CIPHER}x \"%r\" %b"] for regex in parser_regex: try: parser = apache_log_parser.make_parser(regex) parsed = parser(log) except: parsed = None pass # if parser has something in it if parsed: break # else: # print('\n\nUnidentified file name "{0}" in get_parser\n\n'.format(id_helper)) print('\n\nParsed Apache Log \n"{0}" to: \n"{1}"\n\n'.format(log, parsed)) if not parsed: parsed = parse_on_your_own_error_log(log) if not parsed: print('Unable to parse: "{0}"'.format(log)) # input() with open('act-out.txt', 'a') as o: o.write('\nUnable to parse log:\n{0}\n\n'.format(log)) return parsed
def main(**kwargs): print('Converting, please wait...') line_parser = apache_log_parser.make_parser(kwargs['format']) header = True with open(kwargs['input'], 'rb') as inFile, open(kwargs['output'], 'w') as outFile: lines = inFile.readlines() writer = csv.writer(outFile, delimiter=';') for line in lines: try: log_line_data = line_parser(line) except apache_log_parser.LineDoesntMatchException as ex: print( bcolors.FAIL + 'The format specified does not match the log file. Aborting...' + bcolors.ENDC) print('Line: ' + ex.log_line + 'RegEx: ' + ex.regex) exit() if header: writer.writerow(list(log_line_data.keys())) header = False else: writer.writerow(list(log_line_data.values())) print(bcolors.OKGREEN + 'Conversion finished.' + bcolors.ENDC)
def run(self): """ Main run method. """ quiet = self.args.quiet lines = self.args.file.read() if not lines: self.stderr("Log file is empty") sys.exit(1) # Work line by line lines = lines.split('\n') # Build our parser line_parser = apache_log_parser.make_parser(self.args.format) if not quiet: self.stderr(f"Parsing {len(lines)} lines...") # Hold the stats per value stats = defaultdict(int) # Use dynamic fields for fun and profit fields = self.args.fields.split(',') # Parse each line for line in lines: if not line or not line.strip(): # Ignore completely blank lines continue # Print a dot when we parse a line if not quiet: # Print dots to follow progress on very large files sys.stderr.write('.') # Do the parsing using our handy library try: values = line_parser(line) except apache_log_parser.LineDoesntMatchException: self.stderr(f"Could not parse line: {line}") stats['-'] += 1 continue # Find a field that has a good value - this lets us handle both # X-Forwarded-For (the IP of a remote client) and remote_host (the # IP of a load balancer doing healtchecks) for field in fields: value = values.get(field, None) if value and value != '-': stats[value] += 1 break # Maybe we didn't find a good IP, but we just log and count it if not value or value == '-': self.stderr(f"Could not parse line: {line}") stats['-'] += 1 if not quiet: # Print a newline to end the dots if we printed them self.stderr('') self.print_stats(stats)
def test_issue22_http2(self): line_parser = apache_log_parser.make_parser( "%h %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"") sample = '''137.226.113.25 - - [31/Dec/2017:03:14:19 +0100] "GET / HTTP/2" 200 0 "-" "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0"''' log_data = line_parser(sample) expected_data = { 'bytes_tx': '0', 'remote_host': '137.226.113.25', 'remote_logname': '-', 'remote_user': '******', 'request_first_line': 'GET / HTTP/2', 'request_header_referer': '-', 'request_header_user_agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; ' 'rv:50.0) Gecko/20100101 Firefox/50.0', 'request_header_user_agent__browser__family': 'Firefox', # user_agents sometimes returns this with a trailing '.', e.g. '50.0.' # 'request_header_user_agent__browser__version_string': '50.0', 'request_header_user_agent__is_mobile': False, 'request_header_user_agent__os__family': 'Ubuntu', 'request_header_user_agent__os__version_string': '', 'request_http_ver': '2', 'request_method': 'GET', 'request_url': '/', 'status': '200', 'time_received': '[31/Dec/2017:03:14:19 +0100]' } for k, v in expected_data.items(): self.assertEqual(log_data[k], v) log_data = line_parser( '''165.226.7.238 - - [18/Dec/2020:14:54:27 +0000] "GET / HTTP/1.0" 200 19245 "-" "-"''' ) self.assertEqual(log_data['request_http_ver'], '1.0') log_data = line_parser( '''17.103.15.13 - - [17/Dec/2020:00:45:26 +0000] "GET /feed.xml HTTP/1.1" 304 244 "-" "Tiny Tiny RSS/UNKNOWN (Unsupported) (http://tt-rss.org/)"''' ) self.assertEqual(log_data['request_http_ver'], '1.1') log_data = line_parser( '''0.0.0.0 - - [13/Dec/2020:14:49:49 +0000] "GET /static/favicon.png HTTP/2.0" 200 2628 "-" "-"''' ) self.assertEqual(log_data['request_http_ver'], '2.0')
def _get_apache_key_count(self, key): d = dict() line_parser = apache_log_parser.make_parser(config.APACHE_LOGS_FORMAT) for log_line in self.apache_malware_dl: log_line_data = line_parser(log_line) key_value = log_line_data[key] d[key_value] = d.get(key_value, 0) + 1 return d
def test_issue11(self): format_string = "%h <<%P>> %t %Dus \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %l %u" parser = apache_log_parser.make_parser(format_string) sample = '127.0.0.1 <<6113>> [16/Aug/2013:15:45:34 +0000] 1966093us "DELETE / HTTP/1.1" 200 3478 "https://example.com/" "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18)" - -' log_data = parser(sample) self.assertNotEqual(log_data, None) self.assertEqual(log_data['request_first_line'], 'DELETE / HTTP/1.1') self.assertEqual(log_data['request_method'], 'DELETE')
def test_issue10_host(self): # hostname lookup should work format_string = "%h %l %u %t \"%r\" %>s %b" parser = apache_log_parser.make_parser(format_string) sample = '2001:0db8:85a3:0000:0000:8a2e:0370:7334 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326' log_data = parser(sample) self.assertNotEqual(log_data, None) self.assertEqual(log_data['remote_host'], '2001:0db8:85a3:0000:0000:8a2e:0370:7334')
def parse_messages(self): f = open(self.file_name) raw_messages = [line.rstrip('\n') for line in f.readlines()] return_array = [] line_parser = apache_log_parser.make_parser("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"") for raw_message in raw_messages: parsed_message = line_parser(raw_message) return_array.append({"raw_message": raw_message, "parsed_message": parsed_message}) return return_array
def __init__(self, log_entry_queue, domains, global_stats, log_format): threading.Thread.__init__(self) self.daemon = True self.log_entry_queue = log_entry_queue self.domains = domains self.global_stats = global_stats self.parser = apache_log_parser.make_parser(log_format)
def __init__(self, file, format=None): import apache_log_parser self.file = self.rough_filter(file) if not format: # self.format = '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"' self.format = CLFParser.DEFAULT_FORMAT else: self.format = format self.parser = apache_log_parser.make_parser(self.format)
def _parse_file_content(self, content): assert content if self.line_parser is None: self.line_parser = apache_log_parser.make_parser("%h <<%P>> %t %Dus \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %l %u") self.access_events = [] for line in content: access_event = self.line_parser(line) self.access_events.append(access_event) return self.access_events
def test_issue12_nonnum_status(self): # In case status is - as opposed to a number format_string = "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" parser = apache_log_parser.make_parser(format_string) sample1 = '002:52ee:xxxx::x - - [11/Jun/2014:22:55:45 +0000] "GET /X230_2.51_g2uj10us.iso HTTP/1.1" - 3414853 "refer" "Mozilla/5.0 (X11; Linux x86_64; rv:29.0) Gecko/20100101 Firefox/29.0"' log_data1 = parser(sample1) self.assertNotEqual(log_data1, None) self.assertEqual(log_data1['status'], '-')
def parse_log(request): site_obj = Site.objects.order_by("-id") site_list = list(site_obj) log_format_id = int(request.POST.get('log_format_id')) site_id = int(request.POST.get('site_id')) log_format_model = LogFormats.objects.get(id=log_format_id) log_format = str(log_format_model.log_format) line_parser = apache_log_parser.make_parser(log_format) uploaded_file = request.FILES.get('uploaded_file') parsed_log_list = [] log_lines = [] for line in uploaded_file.file: try: line = line.strip() if bool(line) and line not in log_lines: data = line_parser(line) apl = ApacheLog(**data) apl.full_line = line apl.site_id = site_id apl.log_format_id = log_format_id parsed_log_list.append(apl) log_lines.append(line) except Exception as e: return render( request, 'upload_log.html', { 'msg': "Invalid file or Log format!", 'site_id': site_id, 'sites': site_list }) try: from itertools import islice start = 0 batch_size = 10 stop = batch_size while stop <= len(parsed_log_list): batch = list(islice(parsed_log_list, start, stop)) if not batch: break ApacheLog.objects.bulk_create(batch, batch_size) start = stop stop += batch_size if stop > len(parsed_log_list): stop = len(parsed_log_list) except IntegrityError as ie: # should not happen as duplicates should be removed before.. print "Duplicates found!" # return render(request, 'upload_log.html', {'msg': "Uniqueness failed! Most probably file uploaded before!", 'site_id': site_id, 'sites': site_list}) except Exception, e: return render(request, 'upload_log.html', { 'msg': e.message, 'site_id': site_id, 'sites': site_list })
def load(self, filename, tag="default"): line_parser = apache_log_parser.make_parser( "%h %{X-Forwarded-For}i %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" ) count = 0 with open(filename) as f: content = f.readlines() for line in content: # some lines in our access_log are missing the remote_user column # because there are apparently two different CustomLog lines on some # of our servers line = re.sub("- [[]", '- "" [', line) try: data = line_parser(line) except ValueError: self.log.error("Couldn't parse this line: {0}".format(line)) else: (bare_url, query_string) = self.split_url(data['request_url']) self.cursor.execute(""" INSERT INTO access_log ( tag, timestamp, remote_ip, x_forwarded_for, remote_user, method, url, bare_url, query_string, status, bytes, referrer, user_agent, is_mobile) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?) """, [ tag, data['time_received_datetimeobj'].strftime('%s'), data['remote_host'], data['request_header_x_forwarded_for'], data['remote_user'], data['request_method'], data['request_url'], bare_url, query_string, data['status'], data['response_bytes_clf'], data['request_header_referer'], data['request_header_user_agent'], data['request_header_user_agent__is_mobile'] ] ) count +=1 if count % 1000 == 0: self.log.info("Processed {0} lines ({1}%)".format(count, float(count)/float(len(content)) * 100)) self.db.commit() self.db.close() return(len(content))
def parse_log(log): ''' Parses a single server log. log: a line from the server log file Returns: parsed log ''' apache_combined_format = "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" line_parser = make_parser(apache_combined_format) log_data = line_parser(log) return log_data
def parse_log_file(name_file, data): line_parser = apache_log_parser.make_parser("%h %l %u %t \"%r\" %>s %b") data_parse = [] with open(name_file, 'w', newline='') as file: writer = csv.writer(file) for record in data: record_parse = line_parser(record) if (record_parse['request_method'] == 'GET' and record_parse['status'] == '200' and record_parse['request_url_path'].lower().find('jpg') < 0 and record_parse['request_url_path'].lower().find('gif') < 0 and record_parse['request_url_path'].lower().find('png') < 0 and record_parse['request_url_path'].lower().find('bmp') < 0 and record_parse['request_url_path'].lower().find('mpg') < 0 and record_parse['request_url_path'].lower().find('xmb') < 0 and record_parse['request_url_path'].lower().find('jpeg') < 0 and record_parse['request_url_path'].lower().find('xbm') < 0): record_parse_data = [] record_parse_data.append(record_parse['remote_host']) record_parse_data.append(record_parse['request_http_ver']) record_parse_data.append(record_parse['request_method']) record_parse_data.append(record_parse['request_url_path']) record_parse_data.append(record_parse['response_bytes_clf']) record_parse_data.append(record_parse['status']) date_time = datetime.strptime( record_parse['time_received_isoformat'], '%Y-%m-%dT%H:%M:%S') record_parse_data.append(str(date_time.date())) record_parse_data.append(str(date_time.time())) record_parse_data.append(str(date_time.year)) record_parse_data.append(str(date_time.month)) record_parse_data.append(str(date_time.day)) record_parse_data.append(str(date_time.hour)) record_parse_data.append(str(date_time.minute)) record_parse_data.append(str(date_time.second)) data_parse.append(record_parse_data) writer.writerow(record_parse_data)
class GrLogLineReader(RegexApacheLineReader): line_parser = make_parser('%a %b %B %t %m %q %H %X %P %r %R') regexes = [ (BHS_PATTERN[1:] if BHS_PATTERN.startswith('^') else BHS_PATTERN) + '[\_\-A-za-z0-9]{0,50}\.pdf', (CDC1_PATTERN[1:] if CDC1_PATTERN.startswith('^') else CDC1_PATTERN) + '[\_\-A-za-z0-9]{0,50}\.pdf', (CDC2_PATTERN[1:] if CDC2_PATTERN.startswith('^') else CDC2_PATTERN) + '[\_\-A-za-z0-9]{0,50}\.pdf', ]
def __init__(self, filename): self.parser = apache_log_parser.make_parser("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %v") self.filename = filename.split(".")[0] self.bots_filtered = False self.is_html_only = False self.staff_filtered = False self.parsed_log = [] self.add_logs(filename) self.filter_non_page_requests() self.filter_bots()
def parse_logs(self, logs): line_parser = apache_log_parser.make_parser("%v %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"") dictList = [] for log in logs: try: dictList.append(line_parser(log)) print '-- Appended! --' except Exception as e: pass # print logs[-3] # print dictList[-3] return dictList
def reader(args): line_parser = apache_log_parser.make_parser(args.format) if args.auth is not None: credentials = args.auth.split(':') args.auth = requests.auth.HTTPBasicAuth(credentials[0], credentials[1]) pool = Pool(args.workers) gt = GeventTail(file_name=args.log_file) for line in gt.readline(): pool.spawn(worker, args, line, line_parser) pool.join()
def get_gulp_vars( self, APACHE_LOG_FILEPATH=None, pattern=None ): """ Initializes vars. Called by gulp() """ url_lst = [] extracted_lst = [] discounted_lst = [] if APACHE_LOG_FILEPATH is None: APACHE_LOG_FILEPATH = self.APACHE_LOG_FILEPATH if pattern is None: pattern = self.APACHE_COMBINED_PATTERN # pattern = self.APACHE_COMMON_PATTERN line_parser=apache_log_parser.make_parser( pattern ) logging.debug( 'path, ```{}```'.format(APACHE_LOG_FILEPATH) ) return ( url_lst, extracted_lst, discounted_lst, APACHE_LOG_FILEPATH, line_parser )
def get_ips(self, time_frame, given_time): """ Devuelve una lista con cada una de las IP's detectadas durante los 'time_frame' minutos previos a la hora 'given_time'. Acompañando a cara IP irá una marca de verificación (True/False), que indicará si la detección de esa IP es concluyente o es necesaria la confirmación del positivo por parte de otro plugin. """ ip_list = [] if not self.enabled: return ip_list delta_frame = timedelta(minutes=time_frame) line_parser = apache_log_parser.make_parser(self.__webbug_log_format.decode('string_escape')) for remoteaddr in self.__webbug_log: addr = urlparse.urlparse(remoteaddr) # Se obtiene el nombre del fichero de eventos filename = "data/wb-" + addr.hostname + '-' + self.person + "-events.log" with open(filename, 'r') as f: linea = f.readline() # Detección de zona horaria en la primera linea del log if linea: p = re.compile(r"[\+|-]\d\d\d\d\]") tz = p.findall(linea)[0] timezone = timedelta(hours=int(tz[0:3]), minutes=int(tz[0]+tz[3:5])) while True: if not linea: break log_line_data = line_parser(linea) line_time_utc = log_line_data['time_received_datetimeobj'] - timezone if line_time_utc > given_time: break if line_time_utc > given_time - delta_frame: ip = log_line_data['remote_host'] ip_only_list = [a[0] for a in ip_list] origin = urlparse.urlparse(log_line_data['request_first_line']).hostname ref = Referer(log_line_data['request_header_referer']) if not ip_only_list.count(ip): if (ref.medium == 'search') and (ref.search_term is not None): ip_list.append([ip, True, origin]) # IP nueva, la añadimos else: ip_list.append([ip, False, origin]) elif origin != ip_list[ip_only_list.index(ip)][2]: ip_list[ip_only_list.index(ip)][1] = True # IP repetida en página distinta, es un positivo linea = f.readline() return [c[0:2] for c in ip_list]
def connections(self, linha): try: line_parser = apache_log_parser.make_parser(self.conf['apache_mask']) log = line_parser(linha) if self.conf['vhost_enable']: log['vhost'] = linha.split(' ')[0] else: log['vhost'] = None log['owasp'] = self.owasp(log['request_url']) if log['owasp']: log['cef_date'] = log['time_received_datetimeobj'].strftime('%b %d %Y %H:%M:%S') self.send_all(log) except: pass
def eval_data(self, time_frame, analyzed_time, given_time, confirmed_ips): """ Devuelve una lista con un elemento por cada uno de los últimos 'check_interval' minutos antes de la hora 'given_time'. Cada elemento de la lista devuelta contiene el valor acumulado de las detecciones durante los 'time_frame' minutos anteriores. """ eval_time = time_frame + analyzed_time detect_list = [0] * eval_time acum_list = [0] * analyzed_time if not self.enabled: return acum_list time_now_utc = datetime(given_time.year, given_time.month, given_time.day, given_time.hour, given_time.minute) addr = urlparse.urlparse(self.__access_log) # Se obtiene el nombre del fichero de eventos filename = "data/mc-" + addr.hostname + '-' + self.person + "-events.log" line_parser = apache_log_parser.make_parser(self.__access_log_format.decode('string_escape')) with open(filename, 'r') as f: linea = f.readline() # Detección de zona horaria en la primera linea del log if linea: p = re.compile(r"[\+|-]\d\d\d\d\]") tz = p.findall(linea)[0] timezone = timedelta(hours=int(tz[0:3]), minutes=int(tz[0]+tz[3:5])) while linea: log_line_data = line_parser(linea) if confirmed_ips.count(log_line_data['remote_host']): l = log_line_data['time_received_datetimeobj'] line_time_utc = datetime(l.year, l.month, l.day, l.hour, l.minute) - timezone if line_time_utc > time_now_utc: break i = int((time_now_utc - line_time_utc).total_seconds()/60) # Conversión hora a índice de la lista if i < eval_time: detect_list[eval_time - i - 1] += self.__weight # Lista de pesos de detección linea = f.readline() #print "Detect list:", detect_list for i in range(1, analyzed_time + 1): # Acumulacción de pesos de detección para los rangos dados #print "acumulado", analyzed_time - i, "= suma desde", eval_time - time_frame - i, "hasta", eval_time - i, "=", detect_list[eval_time - time_frame - i:eval_time - i + 1], "=", sum(detect_list[eval_time - time_frame - i:eval_time - i]) acum_list[analyzed_time - i] = sum(detect_list[eval_time - time_frame - i:eval_time - i + 1]) return acum_list
def make_csv(filepath): #log_format = '%h %l %u %t \"%r\" %>s %b' #format = r'%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"' log_format = r'%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"' parser = apache_log_parser.make_parser(log_format) read = open(filepath) # this is only for our files write = open(str(filepath[:28]) + str(filepath[-12:-4]) + '.csv', 'w') writer = csv.writer(write, lineterminator="\n") for line in read: log_data = parser(line) if log_data['status'] == '200' \ and log_data['request_method'] == 'GET' \ and (re.search(r'\.(js|css|gif|jpg|jpeg|png|JPG|ico)', log_data['request_url_path'])) is None: temp = log_data['remote_host'], log_data['time_received'], log_data['request_url'] writer.writerow(temp)
def test_simple(self): format_string = "%h <<%P>> %t %Dus \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %l %u" parser = apache_log_parser.make_parser(format_string) sample = '127.0.0.1 <<6113>> [16/Aug/2013:15:45:34 +0000] 1966093us "GET / HTTP/1.1" 200 3478 "https://example.com/" "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18)" - -' log_data = parser(sample) self.assertNotEqual(log_data, None) self.assertEqual(log_data['status'], '200') self.assertEqual(log_data['pid'], '6113') self.assertEqual(log_data['request_first_line'], 'GET / HTTP/1.1') self.assertEqual(log_data['request_method'], 'GET') self.assertEqual(log_data['request_url'], '/') self.assertEqual(log_data['request_header_referer'], 'https://example.com/') self.assertEqual(log_data['request_header_user_agent'], 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18)') self.assertEqual(log_data['request_header_user_agent__os__family'], 'Linux') self.assertEqual(apache_log_parser.get_fieldnames(format_string), ('remote_host', 'pid', 'time_received', 'time_us', 'request_first_line', 'status', 'response_bytes_clf', 'request_header_referer', 'request_header_user_agent', 'remote_logname', 'remote_user'))
def main(SERVER_NAME, FILE_PATH, SEEK_FILE): logging.basicConfig(filename='out.log',level=logging.DEBUG) line_parser = apache_log_parser.make_parser("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"") # MONGODB_HOST = os.environ.get('DB_PORT_27017_TCP_ADDR', '127.0.0.1') client = MongoClient("mongodb://db:27017/") # client = MongoClient(MONGODB_HOST) db = client.secfilter1 f = open(FILE_PATH, 'r') last = 0 try: sf = open(SEEK_FILE, 'r') last = sf.read().strip() last = int(last) sf.close() except: last = 0 pass f.seek(last) try: while True: line = f.readline() if line: last = f.tell() out = line_parser(line) out["server"] = SERVER_NAME out["analyzed"] = False # pprint(out) db.requests.insert_one(out) logging.info(last) logging.debug(str(out)+"\n----\n") with open(SEEK_FILE, 'w+') as sf: sf.write(str(last)) sf.close() else: time.sleep(1) except: traceback.print_exc() finally: f.close() logging.info("BYE")
def main(): # パーサーを作成 # 指定するのは、httpd.confに記載しているLogFormatの書式 parser = apache_log_parser.make_parser('%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"') # MongoDB接続 conn = pymongo.MongoClient('localhost', 27017) db = conn.apache collection = db.log # access_logで開始するアクセスログのリストを取得 access_log_list = get_access_log_list() for file in access_log_list: for line in open('apache_log/' + file): # parse log_data = parser(line) # load to db load_to_db(collection, log_data)
def parse(log_file_path, log_format=LogFormats.APACHE_COMBINED): """ import and parse log files using the apache log parser """ log_data = [] line_parser = apache_log_parser.make_parser(log_format) for file_name in glob.glob(log_file_path): sys.stdout.write("\nFile name: %s\n" % file_name) with open(file_name,'r') as f: for i, l in enumerate(f): pass total = i + 1 f.seek(0) for counter, line in enumerate(f): percent = 100.0 * (counter+1) / total log_data.append( line_parser(line) ) sys.stdout.write("\rProcessed %i of %i entries (%i%%)" % (counter, total, percent )) sys.stdout.flush() sys.stdout.write("\n") return log_data
def get_log_urls(logfiles, logformat, strip_qs=False, strip_anchors=False, verbose=False): """ Parse apache log files, return a dict of distinct URLs (keys) and their most recent HTTP response code (values). :param logfiles: list of absolute paths to access logs to parse :type logfiles: list of strings :param verbose: whether or not to print verbose output :type verbose: boolean :returns: dict of request path => latest response code :rtype: dict, string keys to int values """ temp = {} p = apache_log_parser.make_parser(logformat) for fpath in logfiles: parsefail = 0 lcount = 0 if verbose: print("++ Parsing %s" % fpath) for line in open(fpath): line = str(line).strip() lcount = lcount + 1 try: data = p(line) if data['request_method'] != 'GET': continue data['request_url'] = url_strip(data['request_url'], strip_qs, strip_anchors) if data['request_url'] not in temp: temp[data['request_url']] = {'datetime': data['time_recieved_datetimeobj'], 'status': int(data['status'])} else: if temp[data['request_url']]['datetime'] < data['time_recieved_datetimeobj']: temp[data['request_url']] = {'datetime': data['time_recieved_datetimeobj'], 'status': int(data['status'])} except Exception, e: if verbose: print("Parse Exception: %s for line '%s'" % (str(e), line)) parsefail = parsefail + 1 sys.stderr.write("++ Failed parsing %d of %d lines from %s\n" % (parsefail, lcount, fpath))
def get_ips(self, time_frame, given_time): """ Devuelve una lista con cada una de las IP's detectadas durante los 'time_frame' minutos previos a la hora 'given_time'. Acompañando a cara IP irá una marca de verificación (True/False), que indicará si la detección de esa IP es concluyente o es necesaria la confirmación del positivo por parte de otro plugin. """ ip_list = [] if not self.enabled: return ip_list delta_frame = timedelta(minutes=time_frame) addr = urlparse.urlparse(self.__access_log) # Se obtiene el nombre del fichero de eventos filename = "data/mc-" + addr.hostname + '-' + self.person + "-events.log" line_parser = apache_log_parser.make_parser(self.__access_log_format.decode('string_escape')) with open(filename, 'r') as f: linea = f.readline() # Detección de zona horaria en la primera linea del log if linea: p = re.compile(r"[\+|-]\d\d\d\d\]") tz = p.findall(linea)[0] timezone = timedelta(hours=int(tz[0:3]), minutes=int(tz[0]+tz[3:5])) while True: if not linea: break log_line_data = line_parser(linea) line_time_utc = log_line_data['time_received_datetimeobj'] - timezone if line_time_utc > given_time: break if line_time_utc > given_time - delta_frame: ip = log_line_data['remote_host'] if not ip_list.count([ip, False]): ip_list.append([ip, False]) linea = f.readline() return ip_list
def get_vclients(): fileName = '/var/log/apache2/access.log' line_parser = apache_log_parser.make_parser("%h %l %u %t \"%r\" %>s %O") line_count = 100 curLn = 0 for line in reversed(open(fileName).readlines()): if curLn <= line_count: ## print(line) log_line_data = line_parser(line) ## pprint(log_line_data) cur_ip = log_line_data['remote_host'] ## Get name for the client cur_name_obj = socket.gethostbyaddr(cur_ip) if len(cur_name_obj) > 0: cur_name = cur_name_obj[0] else: cur_name = cur_ip ## Get recent client to database cur_request = log_line_data['request_url'] cur_time = log_line_data['time_received_datetimeobj'].replace(tzinfo=utc) if "m4f" in cur_request: # print(cur_ip + ", " + cur_request + ", " + str(cur_time)) num_results = VClient.objects.filter(ip=cur_ip).count() if num_results > 0: cur_obj = VClient.objects.filter(ip=cur_ip)[0] if cur_obj.last_visit < cur_time: cur_obj.last_visit = cur_time cur_obj.name = cur_name print("[UPDATE]:" + cur_ip + ", " + cur_request + ", " + str(cur_time)) else: cur_obj = VClient(name=cur_name, ip=cur_ip, last_visit=cur_time) cur_obj.save() else: break curLn = curLn + 1
def configure(self, conf): """ Receive and process configuration block from collectd """ for node in conf.children: key = node.key.lower() val = node.values[0] if key == 'accesslog': self.access_log = val if not access(self.access_log, R_OK): self.err('AccessLog %s is not readable!' % self.access_log) elif key == 'accesslogformat': self.access_log_format = val try: self.parser = make_parser(self.access_log_format) except LineDoesntMatchException: self.err('Couldn\'t parse AccessLogFormat: %s' % ( self.access_log_format)) return elif key == 'name': self.plugin_name = val elif key == 'interval': self.interval = val else: self.warn('Unknown config key: %s.' % key)
rqst_type = get_type(rqst_method, rqst_url) doc = {'user_id': user_id, 'user_ip': user_ip, 'server': server, 'timestamp': timestamp, 'request': { 'type': rqst_type, 'method': rqst_method, 'url': rqst_url, 'categ': rqst_categ, }, } # Inserts in collection logs from logapache database db.logs.insert_one(doc) # Log progression if index % 2000 == 0: print 'Progress {}...'.format(index) print 'skipped {} logs', skipped print 'Done inserting logs..' if __name__ == "__main__": # Setup pymongo connection client = MongoClient(MONGO_HOST) db = client.get_database(MONGO_DB) parser = apache_log_parser.make_parser(PARSER_LOG_FORMAT) log_file_path = os.path.join(PARSER_WORK_DIR, PARSER_LOG_FILE_NAME) read_and_insert(log_file_path,db,parser)