Пример #1
0
    def test_pr8(self):
        parser = apache_log_parser.make_parser('%h %{remote}p %v %{local}p %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %P %D %{number}n %{SSL_PROTOCOL}x %{SSL_CIPHER}x %k %{UNIQUE_ID}e ')
        data = parser('127.0.0.1 50153 mysite.co.uk 443 [28/Nov/2014:10:03:40 +0000] "GET /mypage/this/that?stuff=all HTTP/1.1" 200 5129 "-" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36" 18572 363701 0 TLSv1.01 MY-CYPHER 0 VHhIfKwQGCMAAEiMUIAAAAF ')
        self.assertEqual(data, {
            'status': '200', 'extension_ssl_protocol': 'TLSv1.01', 'request_header_user_agent__browser__family': 'Chrome',
            'time_us': '363701', 'num_keepalives': '0', 'request_first_line': 'GET /mypage/this/that?stuff=all HTTP/1.1',
            'pid': '18572', 'response_bytes_clf': '5129', 'request_header_user_agent__os__family': u'Windows 7',
            'request_url': '/mypage/this/that?stuff=all', 'request_http_ver': '1.1',
            'request_header_referer': '-', 'server_name': 'mysite.co.uk', 'request_header_user_agent__is_mobile': False,
            'request_header_user_agent__browser__version_string': '37.0.2062',
            'request_header_user_agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36',
            'note_number': '0', 'request_header_user_agent__os__version_string': '',
            'server_port_local': '443', 'request_method': 'GET',
            'server_port_remote': '50153', 'env_unique_id': 'VHhIfKwQGCMAAEiMUIAAAAF',
            'time_received_datetimeobj': datetime.datetime(2014, 11, 28, 10, 3, 40),
            'time_received_isoformat': '2014-11-28T10:03:40', 'remote_host': '127.0.0.1',
            'time_received': '[28/Nov/2014:10:03:40 +0000]',
            'time_received_tz_datetimeobj': datetime.datetime(2014, 11, 28, 10, 3, 40, tzinfo=apache_log_parser.FixedOffset("0000")),
            'time_received_tz_isoformat': '2014-11-28T10:03:40+00:00', 'remote_host': '127.0.0.1',
            'time_received_utc_datetimeobj': datetime.datetime(2014, 11, 28, 10, 3, 40, tzinfo=apache_log_parser.FixedOffset("0000")),
            'time_received_utc_isoformat': '2014-11-28T10:03:40+00:00', 'remote_host': '127.0.0.1',
            'extension_ssl_cipher': 'MY-CYPHER',
        })

        parser = apache_log_parser.make_parser('%A %V %p %P %a \"%r\" \"%{main_call}n\" %{some_time}t %b %>s %D %{UNIQUE_ID}e ')
        data = parser('127.0.0.1 othersite 80 25572 192.168.1.100 "GET /Class/method/ HTTP/1.1" "-" 20141128155031 2266 200 10991 VHiZx6wQGCMAAEiBE8kAAAAA:VHiZx6wQGiMAAGPkBnMAAAAH:VHiZx6wQGiMAAGPkBnMAAAAH ')
        self.assertEqual(data, {
            'status': '200', 'note_main_call': '-', 'time_some_time': '20141128155031',
            'time_us': '10991', 'request_http_ver': '1.1', 'local_ip': '127.0.0.1',
            'pid': '25572', 'request_first_line': 'GET /Class/method/ HTTP/1.1', 'request_method': 'GET',
            'server_port': '80', 'response_bytes_clf': '2266', 'server_name2': 'othersite',
            'request_url': '/Class/method/',
            'env_unique_id': 'VHiZx6wQGCMAAEiBE8kAAAAA:VHiZx6wQGiMAAGPkBnMAAAAH:VHiZx6wQGiMAAGPkBnMAAAAH',
            'remote_ip': '192.168.1.100'})
Пример #2
0
    def __init__(self, config):
        super().__init__()

        self.config = config

        # Open the log file for reading and seek to the end of it.
        self.logHandle = open(self.config.logFilePath)
        self.logHandle.seek(0, 2)

        # This parser is used to parse every log line.
        self.logParser = apache_log_parser.make_parser(LogStats.LOG_FORMAT)
        # If the first parser fails, we try this one.
        self.logParserAlt = apache_log_parser.make_parser(
            LogStats.LOG_FORMAT_ALT)

        # This lock grants exclusive access to data structures below.
        self.lock = Lock()

        # Various statistics.
        self.numHits = 0  # Total number of requests.
        self.numBadLines = 0  # Number of log lines that could not be parsed.
        self.responseBytesTot = 0  # Total response bytes sent.
        self.retCode2count = defaultdict(int)  # Count for each status code.
        self.method2count = defaultdict(int)  # Count for each request method.

        # This heap keeps track of all sections we have seen so far and their counts.
        self.heap = Heap()

        # Create the alerter and start its event loop in a separate process.
        self.alerter = Alerter(self.config.numHitsToGenAlert,
                               self.config.alertWinLenSecs)
        self.alerterProc = multiprocessing.Process(
            target=self.alerter.runAlerter)
        self.alerterProc.start()
Пример #3
0
 def __init__(self,pidFile,socketFile,netrcFile):
     super(LogSocketDaemon, self).__init__(pidfile=pidFile) 
     self.socketFile=socketFile
     self.logger=logging.getLogger("LogSocketDaemon")
     self.logDBConn=None
     #netrc file with passwords and logins
     self.netrc=netrc.netrc(netrcFile)
     self.onlyLogGeoserver=True
     #it has to be the same as the Apache log
     self.parser=apache_log_parser.make_parser('%v:%p %a %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"')
     self.parserAlternative=apache_log_parser.make_parser('%v:%p %a %l %u %t \"%r\" %>s %O')
Пример #4
0
 def __init__(self, line, server):
     self._log_line_dict = None
     if (server == 'catalina'):
         line_parser = apache_log_parser.make_parser(
             "%h - - %t \"%r\" %>s %b")
     elif (server == 'apache'):
         line_parser = apache_log_parser.make_parser(
             "%h <<%P>> %t %Dus \"%r\" %>s %b  \"%{Referer}i\" \"%{User-Agent}i\" %l %u"
         )
     else:
         line_parser = apache_log_parser.make_parser(
             "%h - - %t \"%r\" %>s %b")
     self._log_line_dict = line_parser(line)
Пример #5
0
 def connections(self, linha):
     try:
         line_parser = apache_log_parser.make_parser(
             self.conf['apache_mask'])
         log = line_parser(linha)
         if self.conf['vhost_enable']:
             log['vhost'] = linha.split(' ')[0]
         else:
             log['vhost'] = None
         log['owasp'] = self.owasp(log['request_url'])
         if log['owasp']:
             self.send_all(log)
     except:
         pass
     try:
         log = linha
         if "body" in log:
             if log['body'] == "Bad Request":
                 url = log['body']['url']
             else:
                 url = log['url']
             if "statusCode" not in log:
                 log['statusCode'] = "Null"
             if "url" not in log and "url" not in log['body']:
                 url = "Null"
             url = url.replace("[masked_session_id]",
                               "").replace("[masked_api_key]", "")
             log['url'] = url
             log['owasp'] = self.owasp(log['url'])
             if log['owasp']:
                 self.send_all(log)
     except Exception as e:
         print(e, "error")
         pass
Пример #6
0
def second(s):
    file_path = path.relpath(s)
    with open(file_path) as f:
        new = ' ' + f.read()
        line_parser = apache_log_parser.make_parser("%h %a %v %U %u %f %H %m %X %l %r %p %P %q %R %T ")
        log_line_data = line_parser(new)
        pprint(log_line_data)
Пример #7
0
    def eval_data(self, time_frame, analyzed_time, given_time, confirmed_ips):
        """
        Devuelve una lista con un elemento por cada uno de los últimos 'check_interval' minutos antes de la hora
        'given_time'. Cada elemento de la lista devuelta contiene el valor acumulado de las detecciones durante
        los 'time_frame' minutos anteriores.
        """
        eval_time = time_frame + analyzed_time
        detect_list = [0] * eval_time
        acum_list = [0] * analyzed_time
        if not self.enabled:
            return acum_list

        time_now_utc = datetime(given_time.year, given_time.month,
                                given_time.day, given_time.hour,
                                given_time.minute)

        addr = urlparse.urlparse(
            self.__access_log)  # Se obtiene el nombre del fichero de eventos
        filename = "data/bl-" + addr.hostname + '-' + self.person + "-events.log"
        line_parser = apache_log_parser.make_parser(
            self.__access_log_format.decode('string_escape'))

        with open(filename, 'r') as f:

            linea = f.readline(
            )  # Detección de zona horaria en la primera linea del log
            if linea:
                p = re.compile(r"[\+|-]\d\d\d\d\]")
                tz = p.findall(linea)[0]
                timezone = timedelta(hours=int(tz[0:3]),
                                     minutes=int(tz[0] + tz[3:5]))

            while linea:
                log_line_data = line_parser(linea)
                if confirmed_ips.count(log_line_data['remote_host']):

                    l = log_line_data['time_received_datetimeobj']
                    line_time_utc = datetime(l.year, l.month, l.day, l.hour,
                                             l.minute) - timezone

                    if line_time_utc > time_now_utc:
                        break

                    i = int((time_now_utc - line_time_utc).total_seconds() /
                            60)  # Conversión hora a índice de la lista
                    if i < eval_time:
                        detect_list[
                            eval_time - i -
                            1] += self.__weight  # Lista de pesos de detección

                linea = f.readline()
            #print "Detect list:", detect_list
            for i in range(
                    1, analyzed_time + 1
            ):  # Acumulacción de pesos de detección para los rangos dados
                #print "acumulado", analyzed_time - i, "= suma desde",  eval_time - time_frame - i, "hasta", eval_time - i, "=", detect_list[eval_time - time_frame - i:eval_time - i + 1], "=", sum(detect_list[eval_time - time_frame - i:eval_time - i])
                acum_list[analyzed_time - i] = sum(
                    detect_list[eval_time - time_frame - i:eval_time - i + 1])

        return acum_list
Пример #8
0
def create_log_parser():
    "Create parser for apache log entries (webfaction default)"
    # apache config:
    # %{X-Forwarded-For}i %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"
    format_string = r'%a %l %u %t "%r" %>s %b "%{Referer}i" "%{User-Agent}i"'
    log_parser = apache_log_parser.make_parser(format_string)
    return log_parser
Пример #9
0
def file_parser(filename):
    """file_parser

       Opens the log file, parse each line into a log entry, sort the entries
       based on the timestamp of when the server recieved the request and yield
       lists of log entires that occured in the same minute.
    
       The logformat is assumed to be "%a %l %u %t \"%r\" %>s %b %D"

       :param filename: The filename of the log file to parse.

       :returns: A list containing the parsed entries sorted by the time
                 they were received by the server.
    """
    LOG_FORMAT = "%a %l %u %t \"%r\" %>s %b %D"

    line_parser = apache_log_parser.make_parser(LOG_FORMAT)

    parsed_entries = []

    with open(filename) as f:
        for line in f:
            parsed_entries.append(line_parser(line))

    # Sort the parsed log entries by timestamp. Some of the log entries in the
    # provided example take a long time to process so they are not in order,
    # this messes up splitting the entries into minute chunks for processing.
    parsed_entries.sort(key=lambda x: x.get('time_received_utc_datetimeobj'))

    return parsed_entries
Пример #10
0
    def test_simple(self):
        format_string = "%h <<%P>> %t %Dus \"%r\" %>s %b  \"%{Referer}i\" \"%{User-Agent}i\" %l %u"
        parser = apache_log_parser.make_parser(format_string)
        sample = '127.0.0.1 <<6113>> [16/Aug/2013:15:45:34 +0000] 1966093us "GET / HTTP/1.1" 200 3478  "https://example.com/" "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18)" - -'
        log_data = parser(sample)
        self.assertNotEqual(log_data, None)
        self.assertEqual(log_data['status'], '200')
        self.assertEqual(log_data['pid'], '6113')
        self.assertEqual(log_data['request_first_line'], 'GET / HTTP/1.1')
        self.assertEqual(log_data['request_method'], 'GET')
        self.assertEqual(log_data['request_url'], '/')
        self.assertEqual(log_data['request_header_referer'],
                         'https://example.com/')

        self.assertEqual(
            log_data['request_header_user_agent'],
            'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18)')

        self.assertEqual(log_data['request_header_user_agent__os__family'],
                         'Linux')

        self.assertEqual(
            apache_log_parser.get_fieldnames(format_string),
            ('remote_host', 'pid', 'time_received', 'time_us',
             'request_first_line', 'status', 'response_bytes_clf',
             'request_header_referer', 'request_header_user_agent',
             'remote_logname', 'remote_user'))
Пример #11
0
    def get_point(self):
        parsers = {}
        for log_format in self.logformats.keys():
            parsers[log_format] = apache_log_parser.make_parser(self.logformats[log_format])

        last_linecount = {}
        while True:
            result = {}
            for file in self.accesslogs:
                filename = file[0]
                fileformat = file[1]
                with open(filename) as logfile:
                    lines = logfile.readlines()
                    start = 0
                    if filename in last_linecount.keys() and len(lines) > last_linecount[filename]:
                        start = last_linecount[filename]
                        last_linecount[filename] = len(lines)
                    new_lines = lines[start:]

                    for line in new_lines:
                        parsed = parsers[fileformat](line[:-1])
                        status = int(parsed['status'])
                        if status in result.keys():
                            result[status] += 1
                        else:
                            result[status] = 1
            yield result
            sleep(60)
Пример #12
0
def read_apache_log(fn,P,area,logformat='%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"'):
    parser = apache_log_parser.make_parser(logformat)
    cou = area["line_num"]
    count = 0
    with open(fn) as f:
        for line in f:
            if(cou > 0):
                cou -=1
                continue
            count += 1
            try:
                parsed_line = parser(line)
                host, time = parsed_line["remote_host"], parsed_line["time_received_tz_datetimeobj"]
                if(area["flag"] == "n"):
                    P.append([host,time])
                elif(area["from"] <= time <= area["to"]):
                    P.append([host,time])
            except ValueError:
                pass
            mem = psutil.virtual_memory() 
            if(mem.percent >= area["memory_usage"]):
                area["line_num"] += count            
                area["memory_flag"] = "on"
                break
        else:
            area["memory_flag"] = "ok"
 def on_modified(self, event):
     with open(event.src_path, 'r') as log_source:
         log_string = ' ' + log_source.read()
         line_parser = apache_log_parser.make_parser(
             "%h %a %v %U %u %f %H %m %X %l %r %p %P %q %R %T ")
         log_line_data = line_parser(log_string)
         print(log_line_data)
Пример #14
0
    def eval_data(self, time_frame, analyzed_time, given_time, confirmed_ips):
        """
        Devuelve una lista con un elemento por cada uno de los últimos 'check_interval' minutos antes de la hora
        'given_time'. Cada elemento de la lista devuelta contiene el valor acumulado de las detecciones durante
        los 'time_frame' minutos anteriores.
        """
        eval_time = time_frame + analyzed_time
        detect_list = [0] * eval_time
        acum_list = [0] * analyzed_time
        if not self.enabled:
            return acum_list

        time_now_utc = datetime(given_time.year, given_time.month, given_time.day, given_time.hour, given_time.minute)

        line_parser = apache_log_parser.make_parser(self.__webbug_log_format.decode('string_escape'))
        for remoteaddr in self.__webbug_log:

            addr = urlparse.urlparse(remoteaddr)  # Se obtiene el nombre del fichero de eventos
            filename = "data/wb-" + addr.hostname + '-' + self.person + "-events.log"

            with open(filename, 'r') as f:

                linea = f.readline()  # Detección de zona horaria en la primera linea del log
                if linea:
                    p = re.compile(r"[\+|-]\d\d\d\d\]")
                    tz = p.findall(linea)[0]
                    timezone = timedelta(hours=int(tz[0:3]), minutes=int(tz[0]+tz[3:5]))

                visiting_ips = []
                while linea:
                    log_line_data = line_parser(linea)
                    current_ip = log_line_data['remote_host']
                    if confirmed_ips.count(current_ip):

                        l = log_line_data['time_received_datetimeobj']
                        line_time_utc = datetime(l.year, l.month, l.day, l.hour, l.minute) - timezone

                        if line_time_utc > time_now_utc:
                            break

                        i = int((time_now_utc - line_time_utc).total_seconds()/60)  # Conversión hora a índice
                        if i < eval_time:
                            ref = Referer(log_line_data['request_header_referer'])
                            origin = urlparse.urlparse(log_line_data['request_first_line'])
                            if (ref.medium == 'search') and (ref.search_term is not None):  # Una búsqueda con términos
                                detect_list[eval_time - i - 1] += self.__weight
                            elif (ref.medium == 'search') and (ref.search_term is None):  # Una búsqueda sin términos
                                detect_list[eval_time - i - 1] += self.__weight_no_search_terms
                            elif (self.__weight_visit > 0) and \
                                    (not visiting_ips.count([current_ip, origin.hostname])):  # Una simple visita
                                visiting_ips.append([current_ip, origin.hostname])  # Solo puntuan una vez por ip/origen
                                detect_list[eval_time - i - 1] += self.__weight_visit

                    linea = f.readline()

                for i in range(1, analyzed_time + 1):  # Acumulacción de pesos de detección para los rangos dados
                    #print "acumulado", analyzed_time - i, "= suma desde",  eval_time - time_frame - i, "hasta", eval_time - i, "=", detect_list[eval_time - time_frame - i:eval_time - i + 1], "=", sum(detect_list[eval_time - time_frame - i:eval_time - i])
                    acum_list[analyzed_time - i] = sum(detect_list[eval_time - time_frame - i:eval_time - i + 1])

        return acum_list
Пример #15
0
def parse_log_line(line):
    line_parser = apache_log_parser.make_parser(
        "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"")
    log_line_data = line_parser(line)

    user_agent = log_line_data['request_header_user_agent__browser__family']
    user = log_line_data['remote_user']
    method = log_line_data['request_method']
    status = log_line_data['status']
    timestamp = log_line_data['time_received_isoformat']
    #pprint(log_line_data)
    pri = 3 * 8 + 2
    if status.startswith("5"):
        pri = 1 * 8 + 2

    if status.startswith("4"):
        pri = 2 * 8 + 2

    version = 1
    hostname = socket.gethostname()
    appname = "Server"
    procid = "-"
    msgid = "-"
    sd = "[%s %s=\"%s\" %s=\"%s\" %s=\"%s\" %s=\"%s\"]" % \
         ("SSID", "User-Agent", user_agent, "user", user, "req-method", method, "status", status)
    msg = "-"
    return "<%s>%s %s %s %s %s %s %s %s\n" % (
        pri, version, timestamp, hostname, appname, procid, msgid, sd, msg)
Пример #16
0
def connections(linha):
    line_parser = apache_log_parser.make_parser(conf['apache_mask'])
    log = line_parser(linha)
    if conf['vhost_enable'] == 'True':
        log['vhost'] = linha.split(' ')[0]
    else:
        log['vhost'] = None
    log['owasp'] = owasp(log['request_url'])
    if log['owasp']:
        msg ='''[+] - Intrusion Attempt - [+]
Date: %s
Vhost: %s
IP: %s
Path: %s
User-Agent: %s
Browser: %s
S.O: %s
Description: %s
Impact: %s
Category: %s''' %(log['time_received'],log['vhost'],log['remote_host'],log['request_url'],
        log['request_header_user_agent'],log['request_header_user_agent__browser__family']+' '+log['request_header_user_agent__browser__version_string'],
        log['request_header_user_agent__os__family'],log['owasp']['description'],
        log['owasp']['impact'],','.join(log['owasp']['tags']['tag']))
        print msg
        print
        if conf['zabbix_enable'] == 'True':
            send_zabbix(msg)

        if conf['telegram_enable'] == 'True':
            time.sleep(3)
            bot.sendMessage(conf['group_id'], msg)
Пример #17
0
class LogEntryMonitoring(Thread):
    PARSER = apache_log_parser.make_parser('%h %u %l %t "%r" %s %B')

    def __init__(self, publisher, file_path):
        Thread.__init__(self)
        self.daemon = True

        self.publisher = publisher
        self.file_path = file_path

    def run(self):
        file = open(self.file_path, 'r')
        file.seek(0, 2)

        while True:
            line = file.readline()

            if not line.strip():
                time.sleep(0.25)
                continue

            try:
                entry = self.parse_line(line)
                self._fire_new_entry_event(entry)
            except LineDoesntMatchException:
                pass

    @staticmethod
    def parse_line(line):
        parts = LogEntryMonitoring.PARSER(line)

        return Entry.factory(parts)

    def _fire_new_entry_event(self, entry):
        self.publisher.sendMessage(TopicUtils.NEW_ENTRY_TOPIC, entry=entry)
Пример #18
0
def parse_url_and_time(base, format):
    """
    Given a list of files/directories, parse each line with apache-log-parser, and
    extract the URL and time data and yield that.
    """
    parser = apache_log_parser.make_parser(format)

    for filename in files(base):

        for line in open_anything(filename):
            try:
                match = parser(line)
            except apache_log_parser.LineDoesntMatchException as ex:
                # Ignore it
                pass

            else:
                # Extract matching data from regex
                results = {
                    'url': match['request_url'],
                    'microsec': int(match['time_us']),
                    'method': match['request_method'],
                    'ipaddr': match['remote_host'],
                    'datetime': match['time_recieved_isoformat']
                }

                yield results
Пример #19
0
def file_parser(filename):
    """file_parser

       Opens the log file, parse each line into a log entry, sort the entries
       based on the timestamp of when the server recieved the request and yield
       lists of log entires that occured in the same minute.
    
       The logformat is assumed to be "%a %l %u %t \"%r\" %>s %b %D"

       :param filename: The filename of the log file to parse.

       :returns: A list containing the parsed entries sorted by the time
                 they were received by the server.
    """
    LOG_FORMAT = "%a %l %u %t \"%r\" %>s %b %D"

    line_parser = apache_log_parser.make_parser(LOG_FORMAT)

    parsed_entries = []

    with open(filename) as f:
        for line in f:
            parsed_entries.append(line_parser(line))

    # Sort the parsed log entries by timestamp. Some of the log entries in the
    # provided example take a long time to process so they are not in order,
    # this messes up splitting the entries into minute chunks for processing.
    parsed_entries.sort(key=lambda x: x.get('time_received_utc_datetimeobj'))

    return parsed_entries
Пример #20
0
def main(**kwargs):

    print('Converting, please wait...')

    line_parser = apache_log_parser.make_parser(kwargs['format'])
    header = True

    with open(kwargs['input'], 'rb') as inFile, open(kwargs['output'], 'w') as outFile:

        lines = inFile.readlines()
        writer = csv.writer(outFile, delimiter=';')

        for line in lines:
            try:
                log_line_data = line_parser(line)
            except apache_log_parser.LineDoesntMatchException as ex:
                print(bcolors.FAIL + 'The format specified does not match the log file. Aborting...' + bcolors.ENDC)
                print('Line: ' + ex.log_line + 'RegEx: ' + ex.regex)
                exit()

            if header:
                writer.writerow(list(log_line_data.keys()))
                header = False
            else:
                writer.writerow(list(log_line_data.values()))

    print(bcolors.OKGREEN + 'Conversion finished.' + bcolors.ENDC)
Пример #21
0
    def get_ips(self, time_frame, given_time):
        """
        Devuelve una lista con cada una de las IP's detectadas durante los 'time_frame' minutos previos a la hora
        'given_time'. Acompañando a cara IP irá una marca de verificación (True/False), que indicará si la detección
        de esa IP es concluyente o es necesaria la confirmación del positivo por parte de otro plugin.
        """
        ip_list = []
        if not self.enabled:
            return ip_list
        delta_frame = timedelta(minutes=time_frame)

        line_parser = apache_log_parser.make_parser(
            self.__webbug_log_format.decode('string_escape'))
        for remoteaddr in self.__webbug_log:

            addr = urlparse.urlparse(
                remoteaddr)  # Se obtiene el nombre del fichero de eventos
            filename = "data/wb-" + addr.hostname + '-' + self.person + "-events.log"

            with open(filename, 'r') as f:

                linea = f.readline(
                )  # Detección de zona horaria en la primera linea del log
                if linea:
                    p = re.compile(r"[\+|-]\d\d\d\d\]")
                    tz = p.findall(linea)[0]
                    timezone = timedelta(hours=int(tz[0:3]),
                                         minutes=int(tz[0] + tz[3:5]))

                while True:
                    if not linea:
                        break
                    log_line_data = line_parser(linea)
                    line_time_utc = log_line_data[
                        'time_received_datetimeobj'] - timezone

                    if line_time_utc > given_time:
                        break

                    if line_time_utc > given_time - delta_frame:
                        ip = log_line_data['remote_host']
                        ip_only_list = [a[0] for a in ip_list]
                        origin = urlparse.urlparse(
                            log_line_data['request_first_line']).hostname
                        ref = Referer(log_line_data['request_header_referer'])
                        if not ip_only_list.count(ip):
                            if (ref.medium == 'search') and (ref.search_term
                                                             is not None):
                                ip_list.append([ip, True, origin
                                                ])  # IP nueva, la añadimos
                            else:
                                ip_list.append([ip, False, origin])
                        elif origin != ip_list[ip_only_list.index(ip)][2]:
                            ip_list[ip_only_list.index(
                                ip
                            )][1] = True  # IP repetida en página distinta, es un positivo

                    linea = f.readline()

        return [c[0:2] for c in ip_list]
def parse_apache_log(log):
	# default parser is for APache Access Logs
	parsed = None

	# i the identified file name is apache access log
	# if 'access_log' == str(id_helper[:10]):
	parser_regex = ["%h <<%P>> %t %Dus \"%r\" %>s %b  \"%{Referer}i\" \"%{User-Agent}i\" %l %u", "%h %l %u %t \"%r\" %>s %b", "%t %h %{SSL_PROTOCOL}x %{SSL_CIPHER}x \"%r\" %b"]
	for regex in parser_regex:
		
		try:
			parser = apache_log_parser.make_parser(regex)
			parsed = parser(log)
		except: 
			parsed = None
			pass

		# if parser has something in it
		if parsed: break

	# else:
	# print('\n\nUnidentified file name "{0}" in get_parser\n\n'.format(id_helper))
	print('\n\nParsed Apache Log \n"{0}" to: \n"{1}"\n\n'.format(log, parsed))
	if not parsed:
		parsed = parse_on_your_own_error_log(log)
	
	if not parsed:
		print('Unable to parse: "{0}"'.format(log))
		# input()
		with open('act-out.txt', 'a') as o: o.write('\nUnable to parse log:\n{0}\n\n'.format(log))

	return parsed
Пример #23
0
def main(**kwargs):

    print('Converting, please wait...')

    line_parser = apache_log_parser.make_parser(kwargs['format'])
    header = True

    with open(kwargs['input'], 'rb') as inFile, open(kwargs['output'],
                                                     'w') as outFile:

        lines = inFile.readlines()
        writer = csv.writer(outFile, delimiter=';')

        for line in lines:
            try:
                log_line_data = line_parser(line)
            except apache_log_parser.LineDoesntMatchException as ex:
                print(
                    bcolors.FAIL +
                    'The format specified does not match the log file. Aborting...'
                    + bcolors.ENDC)
                print('Line: ' + ex.log_line + 'RegEx: ' + ex.regex)
                exit()

            if header:
                writer.writerow(list(log_line_data.keys()))
                header = False
            else:
                writer.writerow(list(log_line_data.values()))

    print(bcolors.OKGREEN + 'Conversion finished.' + bcolors.ENDC)
Пример #24
0
    def run(self):
        """ Main run method. """
        quiet = self.args.quiet
        lines = self.args.file.read()
        if not lines:
            self.stderr("Log file is empty")
            sys.exit(1)

        # Work line by line
        lines = lines.split('\n')

        # Build our parser
        line_parser = apache_log_parser.make_parser(self.args.format)

        if not quiet:
            self.stderr(f"Parsing {len(lines)} lines...")

        # Hold the stats per value
        stats = defaultdict(int)
        # Use dynamic fields for fun and profit
        fields = self.args.fields.split(',')

        # Parse each line
        for line in lines:
            if not line or not line.strip():
                # Ignore completely blank lines
                continue

            # Print a dot when we parse a line
            if not quiet:
                # Print dots to follow progress on very large files
                sys.stderr.write('.')

            # Do the parsing using our handy library
            try:
                values = line_parser(line)
            except apache_log_parser.LineDoesntMatchException:
                self.stderr(f"Could not parse line: {line}")
                stats['-'] += 1
                continue

            # Find a field that has a good value - this lets us handle both
            # X-Forwarded-For (the IP of a remote client) and remote_host (the
            # IP of a load balancer doing healtchecks)
            for field in fields:
                value = values.get(field, None)
                if value and value != '-':
                    stats[value] += 1
                    break

            # Maybe we didn't find a good IP, but we just log and count it
            if not value or value == '-':
                self.stderr(f"Could not parse line: {line}")
                stats['-'] += 1

        if not quiet:
            # Print a newline to end the dots if we printed them
            self.stderr('')
        self.print_stats(stats)
Пример #25
0
    def test_issue22_http2(self):
        line_parser = apache_log_parser.make_parser(
            "%h %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"")
        sample = '''137.226.113.25 - - [31/Dec/2017:03:14:19 +0100] "GET / HTTP/2" 200 0 "-" "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0"'''
        log_data = line_parser(sample)
        expected_data = {
            'bytes_tx':
            '0',
            'remote_host':
            '137.226.113.25',
            'remote_logname':
            '-',
            'remote_user':
            '******',
            'request_first_line':
            'GET / HTTP/2',
            'request_header_referer':
            '-',
            'request_header_user_agent':
            'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; '
            'rv:50.0) Gecko/20100101 Firefox/50.0',
            'request_header_user_agent__browser__family':
            'Firefox',
            # user_agents sometimes returns this with a trailing '.', e.g. '50.0.'
            # 'request_header_user_agent__browser__version_string': '50.0',
            'request_header_user_agent__is_mobile':
            False,
            'request_header_user_agent__os__family':
            'Ubuntu',
            'request_header_user_agent__os__version_string':
            '',
            'request_http_ver':
            '2',
            'request_method':
            'GET',
            'request_url':
            '/',
            'status':
            '200',
            'time_received':
            '[31/Dec/2017:03:14:19 +0100]'
        }
        for k, v in expected_data.items():
            self.assertEqual(log_data[k], v)

        log_data = line_parser(
            '''165.226.7.238 - - [18/Dec/2020:14:54:27 +0000] "GET / HTTP/1.0" 200 19245 "-" "-"'''
        )
        self.assertEqual(log_data['request_http_ver'], '1.0')

        log_data = line_parser(
            '''17.103.15.13 - - [17/Dec/2020:00:45:26 +0000] "GET /feed.xml HTTP/1.1" 304 244 "-" "Tiny Tiny RSS/UNKNOWN (Unsupported) (http://tt-rss.org/)"'''
        )
        self.assertEqual(log_data['request_http_ver'], '1.1')

        log_data = line_parser(
            '''0.0.0.0 - - [13/Dec/2020:14:49:49 +0000] "GET /static/favicon.png HTTP/2.0" 200 2628 "-" "-"'''
        )
        self.assertEqual(log_data['request_http_ver'], '2.0')
Пример #26
0
 def _get_apache_key_count(self, key):
     d = dict()
     line_parser = apache_log_parser.make_parser(config.APACHE_LOGS_FORMAT)
     for log_line in self.apache_malware_dl:
         log_line_data = line_parser(log_line)
         key_value = log_line_data[key]
         d[key_value] = d.get(key_value, 0) + 1
     return d
Пример #27
0
 def test_issue11(self):
     format_string = "%h <<%P>> %t %Dus \"%r\" %>s %b  \"%{Referer}i\" \"%{User-Agent}i\" %l %u"
     parser = apache_log_parser.make_parser(format_string)
     sample = '127.0.0.1 <<6113>> [16/Aug/2013:15:45:34 +0000] 1966093us "DELETE / HTTP/1.1" 200 3478  "https://example.com/" "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18)" - -'
     log_data = parser(sample)
     self.assertNotEqual(log_data, None)
     self.assertEqual(log_data['request_first_line'], 'DELETE / HTTP/1.1')
     self.assertEqual(log_data['request_method'], 'DELETE')
Пример #28
0
 def test_issue11(self):
     format_string = "%h <<%P>> %t %Dus \"%r\" %>s %b  \"%{Referer}i\" \"%{User-Agent}i\" %l %u"
     parser = apache_log_parser.make_parser(format_string)
     sample = '127.0.0.1 <<6113>> [16/Aug/2013:15:45:34 +0000] 1966093us "DELETE / HTTP/1.1" 200 3478  "https://example.com/" "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18)" - -'
     log_data = parser(sample)
     self.assertNotEqual(log_data, None)
     self.assertEqual(log_data['request_first_line'], 'DELETE / HTTP/1.1')
     self.assertEqual(log_data['request_method'], 'DELETE')
Пример #29
0
 def test_issue10_host(self):
     # hostname lookup should work
     format_string = "%h %l %u %t \"%r\" %>s %b"
     parser = apache_log_parser.make_parser(format_string)
     sample = '2001:0db8:85a3:0000:0000:8a2e:0370:7334 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326'
     log_data = parser(sample)
     self.assertNotEqual(log_data, None)
     self.assertEqual(log_data['remote_host'], '2001:0db8:85a3:0000:0000:8a2e:0370:7334')
Пример #30
0
 def parse_messages(self):
     f = open(self.file_name)
     raw_messages = [line.rstrip('\n') for line in f.readlines()]
     return_array = []
     line_parser = apache_log_parser.make_parser("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"")
     for raw_message in raw_messages:
         parsed_message = line_parser(raw_message)
         return_array.append({"raw_message": raw_message, "parsed_message": parsed_message})
     return return_array
Пример #31
0
    def __init__(self, log_entry_queue, domains, global_stats, log_format):
        threading.Thread.__init__(self)
        self.daemon = True

        self.log_entry_queue = log_entry_queue
        self.domains = domains
        self.global_stats = global_stats

        self.parser = apache_log_parser.make_parser(log_format)
Пример #32
0
 def __init__(self, file, format=None):
     import apache_log_parser
     self.file = self.rough_filter(file)
     if not format:
         # self.format = '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"'
         self.format = CLFParser.DEFAULT_FORMAT
     else:
         self.format = format
     self.parser = apache_log_parser.make_parser(self.format)
Пример #33
0
 def _parse_file_content(self, content):
     assert content
     if self.line_parser is None:
         self.line_parser = apache_log_parser.make_parser("%h <<%P>> %t %Dus \"%r\" %>s %b  \"%{Referer}i\" \"%{User-Agent}i\" %l %u")
     self.access_events = []
     for line in content:
         access_event = self.line_parser(line)
         self.access_events.append(access_event)
     return self.access_events
Пример #34
0
    def test_issue12_nonnum_status(self):
        # In case status is - as opposed to a number
        format_string = "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\""
        parser = apache_log_parser.make_parser(format_string)
        sample1 = '002:52ee:xxxx::x - - [11/Jun/2014:22:55:45 +0000] "GET /X230_2.51_g2uj10us.iso HTTP/1.1" - 3414853 "refer" "Mozilla/5.0 (X11; Linux x86_64; rv:29.0) Gecko/20100101 Firefox/29.0"'

        log_data1 = parser(sample1)
        self.assertNotEqual(log_data1, None)
        self.assertEqual(log_data1['status'], '-')
Пример #35
0
    def test_issue12_nonnum_status(self):
        # In case status is - as opposed to a number
        format_string = "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\""
        parser = apache_log_parser.make_parser(format_string)
        sample1 = '002:52ee:xxxx::x - - [11/Jun/2014:22:55:45 +0000] "GET /X230_2.51_g2uj10us.iso HTTP/1.1" - 3414853 "refer" "Mozilla/5.0 (X11; Linux x86_64; rv:29.0) Gecko/20100101 Firefox/29.0"'

        log_data1 = parser(sample1)
        self.assertNotEqual(log_data1, None)
        self.assertEqual(log_data1['status'], '-')
Пример #36
0
def parse_log(request):
    site_obj = Site.objects.order_by("-id")
    site_list = list(site_obj)
    log_format_id = int(request.POST.get('log_format_id'))
    site_id = int(request.POST.get('site_id'))
    log_format_model = LogFormats.objects.get(id=log_format_id)
    log_format = str(log_format_model.log_format)
    line_parser = apache_log_parser.make_parser(log_format)

    uploaded_file = request.FILES.get('uploaded_file')

    parsed_log_list = []
    log_lines = []
    for line in uploaded_file.file:
        try:
            line = line.strip()
            if bool(line) and line not in log_lines:
                data = line_parser(line)
                apl = ApacheLog(**data)
                apl.full_line = line
                apl.site_id = site_id
                apl.log_format_id = log_format_id
                parsed_log_list.append(apl)
                log_lines.append(line)
        except Exception as e:
            return render(
                request, 'upload_log.html', {
                    'msg': "Invalid file or Log format!",
                    'site_id': site_id,
                    'sites': site_list
                })

    try:
        from itertools import islice
        start = 0
        batch_size = 10
        stop = batch_size
        while stop <= len(parsed_log_list):
            batch = list(islice(parsed_log_list, start, stop))
            if not batch:
                break
            ApacheLog.objects.bulk_create(batch, batch_size)
            start = stop
            stop += batch_size
            if stop > len(parsed_log_list):
                stop = len(parsed_log_list)
    except IntegrityError as ie:
        # should not happen as duplicates should be removed before..
        print "Duplicates found!"
        # return render(request, 'upload_log.html', {'msg': "Uniqueness failed! Most probably file uploaded before!", 'site_id': site_id, 'sites': site_list})
    except Exception, e:
        return render(request, 'upload_log.html', {
            'msg': e.message,
            'site_id': site_id,
            'sites': site_list
        })
Пример #37
0
    def load(self, filename, tag="default"):
        line_parser = apache_log_parser.make_parser( "%h %{X-Forwarded-For}i %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" )
        count = 0
        with open(filename) as f:
            content = f.readlines()
            for line in content:
                # some lines in our access_log are missing the remote_user column
                # because there are apparently two different CustomLog lines on some
                # of our servers
                line = re.sub("- [[]", '- "" [', line)
                try:
                    data = line_parser(line)
                except ValueError:
                    self.log.error("Couldn't parse this line: {0}".format(line))
                else:
                    (bare_url, query_string) = self.split_url(data['request_url'])
                    self.cursor.execute("""
INSERT INTO access_log (
    tag,
    timestamp,
    remote_ip,
    x_forwarded_for,
    remote_user,
    method,
    url,
    bare_url,
    query_string,
    status,
    bytes,
    referrer,
    user_agent,
    is_mobile)
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
""", [
    tag,
    data['time_received_datetimeobj'].strftime('%s'),
    data['remote_host'],
    data['request_header_x_forwarded_for'],
    data['remote_user'],
    data['request_method'],
    data['request_url'],
    bare_url,
    query_string,
    data['status'],
    data['response_bytes_clf'],
    data['request_header_referer'],
    data['request_header_user_agent'],
    data['request_header_user_agent__is_mobile']
    ]
) 
                    count +=1
                    if count % 1000 == 0:
                        self.log.info("Processed {0} lines ({1}%)".format(count, float(count)/float(len(content)) * 100))
            self.db.commit()
            self.db.close()
        return(len(content))
Пример #38
0
def parse_log(log):
    ''' Parses a single server log.

        log: a line from the server log file
        Returns: parsed log
    '''
    apache_combined_format = "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\""
    line_parser = make_parser(apache_combined_format)
    log_data = line_parser(log)
    return log_data
Пример #39
0
def parse_log_file(name_file, data):
    line_parser = apache_log_parser.make_parser("%h %l %u %t \"%r\" %>s %b")

    data_parse = []

    with open(name_file, 'w', newline='') as file:
        writer = csv.writer(file)

        for record in data:
            record_parse = line_parser(record)

            if (record_parse['request_method'] == 'GET'
                    and record_parse['status'] == '200' and
                    record_parse['request_url_path'].lower().find('jpg') < 0
                    and
                    record_parse['request_url_path'].lower().find('gif') < 0
                    and
                    record_parse['request_url_path'].lower().find('png') < 0
                    and
                    record_parse['request_url_path'].lower().find('bmp') < 0
                    and
                    record_parse['request_url_path'].lower().find('mpg') < 0
                    and
                    record_parse['request_url_path'].lower().find('xmb') < 0
                    and
                    record_parse['request_url_path'].lower().find('jpeg') < 0
                    and
                    record_parse['request_url_path'].lower().find('xbm') < 0):

                record_parse_data = []

                record_parse_data.append(record_parse['remote_host'])
                record_parse_data.append(record_parse['request_http_ver'])
                record_parse_data.append(record_parse['request_method'])
                record_parse_data.append(record_parse['request_url_path'])
                record_parse_data.append(record_parse['response_bytes_clf'])
                record_parse_data.append(record_parse['status'])

                date_time = datetime.strptime(
                    record_parse['time_received_isoformat'],
                    '%Y-%m-%dT%H:%M:%S')

                record_parse_data.append(str(date_time.date()))
                record_parse_data.append(str(date_time.time()))

                record_parse_data.append(str(date_time.year))
                record_parse_data.append(str(date_time.month))
                record_parse_data.append(str(date_time.day))
                record_parse_data.append(str(date_time.hour))
                record_parse_data.append(str(date_time.minute))
                record_parse_data.append(str(date_time.second))

                data_parse.append(record_parse_data)

                writer.writerow(record_parse_data)
Пример #40
0
class GrLogLineReader(RegexApacheLineReader):

    line_parser = make_parser('%a %b %B %t %m %q %H %X %P %r %R')
    regexes = [
        (BHS_PATTERN[1:] if BHS_PATTERN.startswith('^') else BHS_PATTERN) +
        '[\_\-A-za-z0-9]{0,50}\.pdf',
        (CDC1_PATTERN[1:] if CDC1_PATTERN.startswith('^') else CDC1_PATTERN) +
        '[\_\-A-za-z0-9]{0,50}\.pdf',
        (CDC2_PATTERN[1:] if CDC2_PATTERN.startswith('^') else CDC2_PATTERN) +
        '[\_\-A-za-z0-9]{0,50}\.pdf',
    ]
Пример #41
0
    def __init__(self, filename):
        self.parser = apache_log_parser.make_parser("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %v")
        self.filename = filename.split(".")[0]

        self.bots_filtered = False
        self.is_html_only = False
        self.staff_filtered = False

        self.parsed_log = []

        self.add_logs(filename)
        self.filter_non_page_requests()
        self.filter_bots()
Пример #42
0
    def parse_logs(self, logs):
        line_parser = apache_log_parser.make_parser("%v %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"")

        dictList = []
        for log in logs:
            try:
                dictList.append(line_parser(log))
                print '-- Appended! --'
            except Exception as e:
                pass
        # print logs[-3]
        # print dictList[-3]
        return dictList
Пример #43
0
def reader(args):
    line_parser = apache_log_parser.make_parser(args.format)

    if args.auth is not None:
        credentials = args.auth.split(':')
        args.auth = requests.auth.HTTPBasicAuth(credentials[0], credentials[1])

    pool = Pool(args.workers)

    gt = GeventTail(file_name=args.log_file)
    for line in gt.readline():
        pool.spawn(worker, args, line, line_parser)
    pool.join()
 def get_gulp_vars( self, APACHE_LOG_FILEPATH=None, pattern=None ):
     """ Initializes vars.
         Called by gulp() """
     url_lst = []
     extracted_lst = []
     discounted_lst = []
     if APACHE_LOG_FILEPATH is None:
         APACHE_LOG_FILEPATH = self.APACHE_LOG_FILEPATH
     if pattern is None:
         pattern = self.APACHE_COMBINED_PATTERN
         # pattern = self.APACHE_COMMON_PATTERN
     line_parser=apache_log_parser.make_parser( pattern )
     logging.debug( 'path, ```{}```'.format(APACHE_LOG_FILEPATH) )
     return ( url_lst, extracted_lst, discounted_lst, APACHE_LOG_FILEPATH, line_parser )
Пример #45
0
    def get_ips(self, time_frame, given_time):
        """
        Devuelve una lista con cada una de las IP's detectadas durante los 'time_frame' minutos previos a la hora
        'given_time'. Acompañando a cara IP irá una marca de verificación (True/False), que indicará si la detección
        de esa IP es concluyente o es necesaria la confirmación del positivo por parte de otro plugin.
        """
        ip_list = []
        if not self.enabled:
            return ip_list
        delta_frame = timedelta(minutes=time_frame)

        line_parser = apache_log_parser.make_parser(self.__webbug_log_format.decode('string_escape'))
        for remoteaddr in self.__webbug_log:

            addr = urlparse.urlparse(remoteaddr)  # Se obtiene el nombre del fichero de eventos
            filename = "data/wb-" + addr.hostname + '-' + self.person + "-events.log"

            with open(filename, 'r') as f:

                linea = f.readline()  # Detección de zona horaria en la primera linea del log
                if linea:
                    p = re.compile(r"[\+|-]\d\d\d\d\]")
                    tz = p.findall(linea)[0]
                    timezone = timedelta(hours=int(tz[0:3]), minutes=int(tz[0]+tz[3:5]))

                while True:
                    if not linea:
                        break
                    log_line_data = line_parser(linea)
                    line_time_utc = log_line_data['time_received_datetimeobj'] - timezone

                    if line_time_utc > given_time:
                        break

                    if line_time_utc > given_time - delta_frame:
                        ip = log_line_data['remote_host']
                        ip_only_list = [a[0] for a in ip_list]
                        origin = urlparse.urlparse(log_line_data['request_first_line']).hostname
                        ref = Referer(log_line_data['request_header_referer'])
                        if not ip_only_list.count(ip):
                            if (ref.medium == 'search') and (ref.search_term is not None):
                                ip_list.append([ip, True, origin])  # IP nueva, la añadimos
                            else:
                                ip_list.append([ip, False, origin])
                        elif origin != ip_list[ip_only_list.index(ip)][2]:
                            ip_list[ip_only_list.index(ip)][1] = True  # IP repetida en página distinta, es un positivo

                    linea = f.readline()

        return [c[0:2] for c in ip_list]
Пример #46
0
	def connections(self, linha):
		try:
			line_parser = apache_log_parser.make_parser(self.conf['apache_mask'])
			log = line_parser(linha)
			if self.conf['vhost_enable']:
				log['vhost'] = linha.split(' ')[0]
			else:
				log['vhost'] = None
			log['owasp'] = self.owasp(log['request_url'])
			if log['owasp']:
				log['cef_date'] = log['time_received_datetimeobj'].strftime('%b %d %Y %H:%M:%S')
				self.send_all(log)
		except:
			pass
Пример #47
0
    def eval_data(self, time_frame, analyzed_time, given_time, confirmed_ips):
        """
        Devuelve una lista con un elemento por cada uno de los últimos 'check_interval' minutos antes de la hora
        'given_time'. Cada elemento de la lista devuelta contiene el valor acumulado de las detecciones durante
        los 'time_frame' minutos anteriores.
        """
        eval_time = time_frame + analyzed_time
        detect_list = [0] * eval_time
        acum_list = [0] * analyzed_time
        if not self.enabled:
            return acum_list

        time_now_utc = datetime(given_time.year, given_time.month, given_time.day, given_time.hour, given_time.minute)

        addr = urlparse.urlparse(self.__access_log)  # Se obtiene el nombre del fichero de eventos
        filename = "data/mc-" + addr.hostname + '-' + self.person + "-events.log"
        line_parser = apache_log_parser.make_parser(self.__access_log_format.decode('string_escape'))

        with open(filename, 'r') as f:

            linea = f.readline()  # Detección de zona horaria en la primera linea del log
            if linea:
                p = re.compile(r"[\+|-]\d\d\d\d\]")
                tz = p.findall(linea)[0]
                timezone = timedelta(hours=int(tz[0:3]), minutes=int(tz[0]+tz[3:5]))

            while linea:
                log_line_data = line_parser(linea)
                if confirmed_ips.count(log_line_data['remote_host']):

                    l = log_line_data['time_received_datetimeobj']
                    line_time_utc = datetime(l.year, l.month, l.day, l.hour, l.minute) - timezone

                    if line_time_utc > time_now_utc:
                        break

                    i = int((time_now_utc - line_time_utc).total_seconds()/60)  # Conversión hora a índice de la lista
                    if i < eval_time:
                        detect_list[eval_time - i - 1] += self.__weight  # Lista de pesos de detección

                linea = f.readline()
            #print "Detect list:", detect_list
            for i in range(1, analyzed_time + 1):  # Acumulacción de pesos de detección para los rangos dados
                #print "acumulado", analyzed_time - i, "= suma desde",  eval_time - time_frame - i, "hasta", eval_time - i, "=", detect_list[eval_time - time_frame - i:eval_time - i + 1], "=", sum(detect_list[eval_time - time_frame - i:eval_time - i])
                acum_list[analyzed_time - i] = sum(detect_list[eval_time - time_frame - i:eval_time - i + 1])

        return acum_list
Пример #48
0
def make_csv(filepath):
    #log_format = '%h %l %u %t \"%r\" %>s %b'
    #format = r'%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"'
    log_format = r'%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"'
    parser = apache_log_parser.make_parser(log_format)
    read = open(filepath)
    # this is only for our files
    write = open(str(filepath[:28]) + str(filepath[-12:-4]) + '.csv', 'w')
    writer = csv.writer(write, lineterminator="\n")

    for line in read:
        log_data = parser(line)
        if log_data['status'] == '200' \
                and log_data['request_method'] == 'GET' \
                and (re.search(r'\.(js|css|gif|jpg|jpeg|png|JPG|ico)', log_data['request_url_path'])) is None:
            temp = log_data['remote_host'], log_data['time_received'], log_data['request_url']
            writer.writerow(temp)
Пример #49
0
    def test_simple(self):
        format_string = "%h <<%P>> %t %Dus \"%r\" %>s %b  \"%{Referer}i\" \"%{User-Agent}i\" %l %u"
        parser = apache_log_parser.make_parser(format_string)
        sample = '127.0.0.1 <<6113>> [16/Aug/2013:15:45:34 +0000] 1966093us "GET / HTTP/1.1" 200 3478  "https://example.com/" "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18)" - -'
        log_data = parser(sample)
        self.assertNotEqual(log_data, None)
        self.assertEqual(log_data['status'], '200')
        self.assertEqual(log_data['pid'], '6113')
        self.assertEqual(log_data['request_first_line'], 'GET / HTTP/1.1')
        self.assertEqual(log_data['request_method'], 'GET')
        self.assertEqual(log_data['request_url'], '/')
        self.assertEqual(log_data['request_header_referer'], 'https://example.com/')

        self.assertEqual(log_data['request_header_user_agent'], 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18)')

        self.assertEqual(log_data['request_header_user_agent__os__family'], 'Linux')

        self.assertEqual(apache_log_parser.get_fieldnames(format_string), ('remote_host', 'pid', 'time_received', 'time_us', 'request_first_line', 'status', 'response_bytes_clf', 'request_header_referer', 'request_header_user_agent', 'remote_logname', 'remote_user'))
Пример #50
0
def main(SERVER_NAME, FILE_PATH, SEEK_FILE):
    logging.basicConfig(filename='out.log',level=logging.DEBUG)
    line_parser = apache_log_parser.make_parser("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"")
    # MONGODB_HOST = os.environ.get('DB_PORT_27017_TCP_ADDR', '127.0.0.1')
    client = MongoClient("mongodb://db:27017/")
    
    # client = MongoClient(MONGODB_HOST)
    db = client.secfilter1
    f = open(FILE_PATH, 'r')
    last = 0
    try:
        sf = open(SEEK_FILE, 'r')
        last = sf.read().strip()
        last = int(last)
        sf.close()
    except:
        last = 0
        pass
    f.seek(last)
    try:
        while True:
            line = f.readline()
            if line:
                last = f.tell()
                out = line_parser(line)
                out["server"] = SERVER_NAME
                out["analyzed"] = False
                # pprint(out)
                db.requests.insert_one(out)
                logging.info(last)
                logging.debug(str(out)+"\n----\n")
                with open(SEEK_FILE, 'w+') as sf:
                    sf.write(str(last))
                    sf.close()
            else:
                time.sleep(1)

    except:
        traceback.print_exc()
    finally:
        f.close()
        logging.info("BYE")
Пример #51
0
def main():

    # パーサーを作成
    # 指定するのは、httpd.confに記載しているLogFormatの書式
    parser = apache_log_parser.make_parser('%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"')

    # MongoDB接続 
    conn = pymongo.MongoClient('localhost', 27017)
    db = conn.apache
    collection = db.log

    # access_logで開始するアクセスログのリストを取得
    access_log_list = get_access_log_list()
    for file in access_log_list:
        for line in open('apache_log/' + file):
            # parse
            log_data = parser(line)

            # load to db
            load_to_db(collection, log_data)
Пример #52
0
def parse(log_file_path, log_format=LogFormats.APACHE_COMBINED):  
    """ import and parse log files using the apache log parser """
    log_data = []
    line_parser = apache_log_parser.make_parser(log_format)  
    
    for file_name in glob.glob(log_file_path):  
        sys.stdout.write("\nFile name: %s\n" % file_name)  
     
        with open(file_name,'r') as f:
            for i, l in enumerate(f):
                pass
            total = i + 1
            f.seek(0)
            for counter, line in enumerate(f):
                percent = 100.0 * (counter+1) / total
                log_data.append( line_parser(line) )
                sys.stdout.write("\rProcessed %i of %i entries (%i%%)" % (counter, total, percent ))
                sys.stdout.flush()

    sys.stdout.write("\n")    
    return log_data  
def get_log_urls(logfiles, logformat, strip_qs=False, strip_anchors=False, verbose=False):
    """
    Parse apache log files, return a dict of distinct URLs (keys)
    and their most recent HTTP response code (values).

    :param logfiles: list of absolute paths to access logs to parse
    :type logfiles: list of strings
    :param verbose: whether or not to print verbose output
    :type verbose: boolean
    :returns: dict of request path => latest response code
    :rtype: dict, string keys to int values
    """
    temp = {}
    p = apache_log_parser.make_parser(logformat)
    for fpath in logfiles:
        parsefail = 0
        lcount = 0
        if verbose:
            print("++ Parsing %s" % fpath)
        for line in open(fpath):
            line = str(line).strip()
            lcount = lcount + 1
            try:
                data = p(line)
                if data['request_method'] != 'GET':
                    continue
                data['request_url'] = url_strip(data['request_url'], strip_qs, strip_anchors)
                if data['request_url'] not in temp:
                    temp[data['request_url']] = {'datetime': data['time_recieved_datetimeobj'],
                                                 'status': int(data['status'])}
                else:
                    if temp[data['request_url']]['datetime'] < data['time_recieved_datetimeobj']:
                        temp[data['request_url']] = {'datetime': data['time_recieved_datetimeobj'],
                                                     'status': int(data['status'])}
            except Exception, e:
                if verbose:
                    print("Parse Exception: %s for line '%s'" % (str(e), line))
                parsefail = parsefail + 1
        sys.stderr.write("++ Failed parsing %d of %d lines from %s\n" % (parsefail, lcount, fpath))
Пример #54
0
    def get_ips(self, time_frame, given_time):
        """
        Devuelve una lista con cada una de las IP's detectadas durante los 'time_frame' minutos previos a la hora
        'given_time'. Acompañando a cara IP irá una marca de verificación (True/False), que indicará si la detección
        de esa IP es concluyente o es necesaria la confirmación del positivo por parte de otro plugin.
        """
        ip_list = []
        if not self.enabled:
            return ip_list
        delta_frame = timedelta(minutes=time_frame)

        addr = urlparse.urlparse(self.__access_log)  # Se obtiene el nombre del fichero de eventos
        filename = "data/mc-" + addr.hostname + '-' + self.person + "-events.log"
        line_parser = apache_log_parser.make_parser(self.__access_log_format.decode('string_escape'))

        with open(filename, 'r') as f:

            linea = f.readline()  # Detección de zona horaria en la primera linea del log
            if linea:
                p = re.compile(r"[\+|-]\d\d\d\d\]")
                tz = p.findall(linea)[0]
                timezone = timedelta(hours=int(tz[0:3]), minutes=int(tz[0]+tz[3:5]))

            while True:
                if not linea:
                    break
                log_line_data = line_parser(linea)
                line_time_utc = log_line_data['time_received_datetimeobj'] - timezone
                if line_time_utc > given_time:
                    break
                if line_time_utc > given_time - delta_frame:
                    ip = log_line_data['remote_host']
                    if not ip_list.count([ip, False]):
                        ip_list.append([ip, False])

                linea = f.readline()

        return ip_list
Пример #55
0
def get_vclients():
	fileName = '/var/log/apache2/access.log'
	line_parser = apache_log_parser.make_parser("%h %l %u %t \"%r\" %>s %O")
	line_count = 100
	curLn = 0
	for line in reversed(open(fileName).readlines()):
		if curLn <= line_count:
			## print(line)
			log_line_data = line_parser(line)
			## pprint(log_line_data)
			cur_ip = log_line_data['remote_host']

			## Get name for the client
			cur_name_obj = socket.gethostbyaddr(cur_ip)
			if len(cur_name_obj) > 0:
				cur_name = cur_name_obj[0]
			else:
				cur_name = cur_ip

			## Get recent client to database
			cur_request = log_line_data['request_url']
			cur_time = log_line_data['time_received_datetimeobj'].replace(tzinfo=utc)
			if "m4f" in cur_request:
				# print(cur_ip + ", " + cur_request + ", " + str(cur_time))
				num_results = VClient.objects.filter(ip=cur_ip).count()
				if num_results > 0:
					cur_obj = VClient.objects.filter(ip=cur_ip)[0]
					if cur_obj.last_visit < cur_time:
						cur_obj.last_visit = cur_time
						cur_obj.name = cur_name
						print("[UPDATE]:" + cur_ip + ", " + cur_request + ", " + str(cur_time))
				else:
					cur_obj = VClient(name=cur_name, ip=cur_ip, last_visit=cur_time)
				cur_obj.save()
				
		else:
			break
		curLn = curLn + 1 
def parse_url_and_time(base, format):
    """
    Given a list of files/directories, parse each line with apache-log-parser, and
    extract the URL and time data and yield that.
    """
    parser = apache_log_parser.make_parser(format)

    for filename in files(base):

        for line in open_anything(filename):
            try:
                match = parser(line)
            except apache_log_parser.LineDoesntMatchException as ex:
                # Ignore it
                pass

            else:
                # Extract matching data from regex
                results = { 'url': match['request_url'], 'microsec': int(match['time_us']),
                            'method': match['request_method'], 'ipaddr': match['remote_host'],
                            'datetime': match['time_recieved_isoformat'] }

                yield results
Пример #57
0
    def configure(self, conf):
        """ Receive and process configuration block from collectd """
        for node in conf.children:
            key = node.key.lower()
            val = node.values[0]

            if key == 'accesslog':
                self.access_log = val
                if not access(self.access_log, R_OK):
                    self.err('AccessLog %s is not readable!' % self.access_log)
            elif key == 'accesslogformat':
                self.access_log_format = val
                try:
                    self.parser = make_parser(self.access_log_format)
                except LineDoesntMatchException:
                    self.err('Couldn\'t parse AccessLogFormat: %s' % (
                        self.access_log_format))
                    return
            elif key == 'name':
                self.plugin_name = val
            elif key == 'interval':
                self.interval = val
            else:
                self.warn('Unknown config key: %s.' % key)
Пример #58
0
			rqst_type = get_type(rqst_method, rqst_url)

			doc = {'user_id': user_id,
				   'user_ip': user_ip,
				   'server': server,
				   'timestamp': timestamp,
				   'request': {
					   'type': rqst_type,
					   'method': rqst_method,
					   'url': rqst_url,
					   'categ': rqst_categ,
					   },
				   }
            
			# Inserts in collection logs from logapache database
			db.logs.insert_one(doc)

			# Log progression
			if index % 2000 == 0:
					print 'Progress {}...'.format(index)
					
		print 'skipped {} logs', skipped
		print 'Done inserting logs..'

if __name__ == "__main__":
        # Setup pymongo connection
		client = MongoClient(MONGO_HOST)
		db = client.get_database(MONGO_DB)
		parser = apache_log_parser.make_parser(PARSER_LOG_FORMAT)
		log_file_path = os.path.join(PARSER_WORK_DIR, PARSER_LOG_FILE_NAME)
		read_and_insert(log_file_path,db,parser)