Python BsonParser примеры, lib.cuckoo.common.netlog.BsonParser Python примеры использования

Пример #1

0

Показать файл

Файл: windows.py Проект: 0xhack/cuckoo

    def parse(self, path):
        # Invoke parsing of current log file.
        parser = BsonParser(open(path, "rb"))
        parser.init()

        for event in parser:
            if event["type"] == "process":
                process = dict(event)
                process["calls"] = MonitorProcessLog(parser)
                self.processes.append(process)

                self.reconstructors[process["pid"]] = BehaviorReconstructor()

            # Create generic events out of the windows calls.
            elif event["type"] == "apicall":
                reconstructor = self.reconstructors[event["pid"]]
                res = reconstructor.process_apicall(event)

                if res and isinstance(res, tuple):
                    res = [res]

                if res:
                    for category, arg in res:
                        yield {
                            "type": "generic",
                            "pid": event["pid"],
                            "category": category,
                            "value": arg,
                        }

                # Indicate that the process has API calls. For more
                # information on this matter, see also the __nonzero__ above.
                process["calls"].has_apicalls = True

            yield event

Пример #2

0

Показать файл

Файл: windows.py Проект: 0day29/cuckoo

    def parse(self, path):
        # Invoke parsing of current log file.
        parser = BsonParser(open(path, "rb"))
        parser.init()

        for event in parser:
            if event["type"] == "process":
                process = dict(event)
                process["calls"] = MonitorProcessLog(
                    parser, process["modules"]
                )
                self.processes.append(process)

                self.behavior[process["pid"]] = BehaviorReconstructor()
                self.reboot[process["pid"]] = RebootReconstructor()

            # Create generic events out of the windows calls.
            elif event["type"] == "apicall":
                behavior = self.behavior[event["pid"]]
                reboot = self.reboot[event["pid"]]

                for category, arg in behavior.process_apicall(event):
                    yield {
                        "type": "generic",
                        "pid": event["pid"],
                        "category": category,
                        "value": arg,
                    }

                # Process the reboot reconstructor.
                for category, args in reboot.process_apicall(event):
                    # TODO Improve this where we have to calculate the "real"
                    # time again even though we already do this in
                    # MonitorProcessLog.
                    ts = process["first_seen"] + \
                        datetime.timedelta(0, 0, event["time"] * 1000)

                    yield {
                        "type": "reboot",
                        "category": category,
                        "args": args,
                        "time": int(ts.strftime("%d")),
                    }

                # Indicate that the process has API calls. For more
                # information on this matter, see also the __nonzero__ above.
                process["calls"].has_apicalls = True

            yield event

Пример #3

0

Показать файл

Файл: resultserver.py Проект: wwwiretap/cuckoo

    def negotiate_protocol(self):
        # Read until newline.
        buf = self.read_newline()

        if "BSON" in buf:
            self.protocol = BsonParser(self)
        elif "FILE" in buf:
            self.protocol = FileUpload(self)
        elif "LOG" in buf:
            self.protocol = LogHandler(self)
        else:
            raise CuckooOperationalError("Netlog failure, unknown "
                                         "protocol requested.")

Пример #4

0

Показать файл

    def parse_first_and_reset(self):
        """ Open file and init Bson Parser. Read till first process
        """
        self.fd = open(self._log_path, "rb")

        if self._log_path.endswith(".bson"):
            self.parser = BsonParser(self)
        else:
            self.fd.close()
            self.fd = None
            return

        # Get the process information from file
        # Note that we have to read in all messages until we
        # get all the information we need, so the invariant below
        # should involve the last process-related bit of
        # information logged
        # Environment info will be filled in as the log is read
        # and will be stored by reference into the results dict
        while not self.process_id:
            self.parser.read_next_message()

        self.fd.seek(0)

Пример #5

0

Показать файл

Файл: resultserver.py Проект: 5l1v3r1/CAPE-1

    def negotiate_protocol(self):
        # Read until newline.
        buf = self.read_newline()

        if "BSON" in buf:
            self.protocol = BsonParser(self)
        elif "FILE" in buf:
            self.protocol = FileUpload(self, is_binary=False, duplicate=False)
        elif "DUPLICATEBINARY" in buf:
            self.protocol = FileUpload(self, is_binary=True, duplicate=True)
        elif "BINARY" in buf:
            self.protocol = FileUpload(self, is_binary=True, duplicate=False)
        elif "LOG" in buf:
            self.protocol = LogHandler(self)
        else:
            raise CuckooOperationalError("Netlog failure, unknown "
                                         "protocol requested.")

Пример #6

0

Показать файл

Файл: behavior.py Проект: HazemAlbezreh/cuckoo

    def parse_first_and_reset(self):
        self.fd = open(self._log_path, "rb")

        if self._log_path.endswith(".bson"):
            self.parser = BsonParser(self)
        elif self._log_path.endswith(".raw"):
            self.parser = NetlogParser(self)
        else:
            self.fd.close()
            self.fd = None
            return

        # Get the process information from file to determine
        # process id (file names.)
        while not self.process_id:
            self.parser.read_next_message()

        self.fd.seek(0)

Пример #7

0

Показать файл

Файл: behavior.py Проект: KillerInstinct/elastic-cuckoo-modified

    def parse_first_and_reset(self):
        """ Open file and init Bson Parser. Read till first process
        """
        self.fd = open(self._log_path, "rb")

        if self._log_path.endswith(".bson"):
            self.parser = BsonParser(self)
        else:
            self.fd.close()
            self.fd = None
            return

        # Get the process information from file to determine
        # process id (file names.)
        while not self.process_id:
            self.parser.read_next_message()

        self.fd.seek(0)

Пример #8

0

Показать файл

Файл: test_netlog.py Проект: hariomenkel/CAPEv2

    def test_read_next_message(self, bson_file):
        b = BsonParser(bson_file)
        b.read_next_message()
        assert len(bson_file.process_log) == 0

        b.read_next_message()
        assert bson_file.process_log == (
            [0, 0, 1, 0, 2360, 0, 0, 0],
            datetime.datetime(2020, 11, 6, 10, 34, 36, 359375),
            1976,
            476,
            b'C:\\Windows\\sysnative\\lsass.exe',
            b"lsass.exe",
        )

Пример #9

0

Показать файл

Файл: behavior.py Проект: agilemobiledev/cuckoo-modified

    def parse_first_and_reset(self):
        """ Open file and init Bson Parser. Read till first process
        """
        self.fd = open(self._log_path, "rb")

        if self._log_path.endswith(".bson"):
            self.parser = BsonParser(self)
        else:
            self.fd.close()
            self.fd = None
            return

        # Get the process and environment information from file
        # Note that we have to read in all messages until we
        # get all the information we need, so the invariant below
        # should involve the last process-related bit of
        # information logged
        while not self.environdict:
            self.parser.read_next_message()

        self.fd.seek(0)

Пример #10

0

Показать файл

Файл: resultserver.py Проект: zhangshuangjun/cuckoo

    def negotiate_protocol(self):
        protocol = self.read_newline(strip=True)

        # Command with version number.
        if " " in protocol:
            command, version = protocol.split()
            version = int(version)
        else:
            command, version = protocol, None

        if command == "BSON":
            self.protocol = BsonParser(self, version)
        elif command == "FILE":
            self.protocol = FileUpload(self, version)
        elif command == "LOG":
            self.protocol = LogHandler(self, version)
        else:
            raise CuckooOperationalError(
                "Netlog failure, unknown protocol requested.")

        self.protocol.init()

Пример #11

0

Показать файл

Файл: behavior.py Проект: kevoreilly/CAPEv2

class ParseProcessLog(list):
    """Parses process log file."""

    def __init__(self, log_path):
        """@param log_path: log file path."""
        self._log_path = log_path
        self.fd = None
        self.parser = None

        self.reporting_mode = False
        self.process_id = None
        self.process_name = None
        self.parent_id = None
        self.module_path = None
        # Using an empty initializer here allows the assignment of current_log.threads in the Processes run()
        # method to get a reference to the threads list we eventually build up by fully parsing a log
        # via the behavior analysis that happens later.  By the time the results dict is used later
        # to extract this information, it will finally have valid info.
        self.threads = []
        self.first_seen = None
        self.calls = self
        self.lastcall = None
        self.environdict = {}
        self.api_count = 0
        self.call_id = 0
        self.conversion_cache = {}
        self.api_limit = cfg.processing.analysis_call_limit  # Limit of API calls per process

        if os.path.exists(log_path) and os.stat(log_path).st_size > 0:
            self.parse_first_and_reset()

        if cfg.processing.ram_boost:
            self.api_call_cache = []
            self.api_pointer = 0

            try:
                while True:
                    i = self.cacheless_next()
                    self.api_call_cache.append(i)
            except StopIteration:
                pass
            self.api_call_cache.append(None)

    def parse_first_and_reset(self):
        """Open file and init Bson Parser. Read till first process"""
        if not self._log_path.endswith(".bson"):
            return

        self.fd = open(self._log_path, "rb")
        self.parser = BsonParser(self)

        # Get the process information from file
        # Note that we have to read in all messages until we
        # get all the information we need, so the invariant below
        # should involve the last process-related bit of
        # information logged
        # Environment info will be filled in as the log is read
        # and will be stored by reference into the results dict
        while not self.process_id:
            self.parser.read_next_message()

        self.fd.seek(0)

    def read(self, length):
        """Read data from log file

        @param length: Length in byte to read
        """
        if not length or length < 0:
            return b""
        buf = self.fd.read(length)
        if not buf or len(buf) != length:
            raise EOFError()
        return buf

    def __iter__(self):
        # import inspect
        # log.debug("iter called by: %s", inspect.stack()[1])
        # import code; code.interact(local=dict(locals(), **globals()))
        return self

    def __repr__(self):
        return f"<ParseProcessLog log-path: {self._log_path}>"

    def __nonzero__(self):
        return self.wait_for_lastcall()

    def reset(self):
        """Reset fd"""
        self.fd.seek(0)
        self.api_count = 0
        self.lastcall = None
        self.call_id = 0
        self.api_pointer = 0

    def compare_calls(self, a, b):
        """Compare two calls for equality. Same implementation as before netlog.
        @param a: call a
        @param b: call b
        @return: True if a == b else False
        """
        return (
            a["api"] == b["api"] and a["status"] == b["status"] and a["arguments"] == b["arguments"] and a["return"] == b["return"]
        )

    def wait_for_lastcall(self):
        """If there is no lastcall, iterate through messages till a call is found or EOF.
        To get the next call, set self.lastcall to None before calling this function

        @return: True if there is a call, False on EOF
        """
        while not self.lastcall:
            try:
                if not self.parser.read_next_message():
                    return False
            except EOFError:
                return False

        return True

    def cacheless_next(self):
        if not self.fd:
            raise StopIteration()

        if not self.wait_for_lastcall():
            self.reset()
            raise StopIteration()

        self.api_count += 1
        if self.api_limit and self.api_count > self.api_limit:
            self.reset()
            raise StopIteration()

        nextcall, self.lastcall = self.lastcall, None

        self.wait_for_lastcall()
        while self.lastcall and self.compare_calls(nextcall, self.lastcall):
            nextcall["repeated"] += self.lastcall["repeated"] + 1
            self.lastcall = None
            self.wait_for_lastcall()

        nextcall["id"] = self.call_id
        self.call_id += 1

        return nextcall

    def __next__(self):
        """Just accessing the cache"""

        if not cfg.processing.ram_boost:
            return self.cacheless_next()
        res = self.api_call_cache[self.api_pointer]
        if res is None:
            self.reset()
            raise StopIteration()
        self.api_pointer += 1
        return res

    def log_process(self, context, timestring, pid, ppid, modulepath, procname):
        """log process information parsed from data file

        @param context: ignored
        @param timestring: Process first seen time
        @param pid: PID
        @param ppid: Parent PID
        @param modulepath: ignored
        @param procname: Process name
        """
        self.process_id, self.parent_id, self.process_name = pid, ppid, procname
        self.module_path = modulepath
        self.first_seen = timestring

    def log_thread(self, context, pid):
        pass

    def log_environ(self, context, environdict):
        """log user/process environment information for later use in behavioral signatures

        @param context: ignored
        @param environdict: dict of the various collected information, which will expand over time
        """
        self.environdict.update(bytes2str(environdict))

    def log_anomaly(self, subcategory, tid, funcname, msg):
        """log an anomaly parsed from data file

        @param subcategory:
        @param tid: Thread ID
        @param funcname:
        @param msg:
        """
        self.lastcall = {
            "thread_id": tid,
            "category": "anomaly",
            "api": "",
            "subcategory": subcategory,
            "funcname": funcname,
            "msg": msg,
        }

    def log_call(self, context, apiname, category, arguments):
        """log an api call from data file
        @param context: containing additional api info
        @param apiname: name of the api
        @param category: win32 function category
        @param arguments: arguments to the api call
        """
        apiindex, repeated, status, returnval, tid, timediff, caller, parentcaller = context

        current_time = self.first_seen + datetime.timedelta(0, 0, timediff * 1000)
        timestring = logtime(current_time)

        self.lastcall = self._parse(
            [timestring, tid, caller, parentcaller, category, apiname, repeated, status, returnval] + arguments
        )

    def log_error(self, emsg):
        """Log an error"""
        log.warning("ParseProcessLog error condition on log %s: %s", self._log_path, emsg)

    def begin_reporting(self):
        self.reporting_mode = True
        if cfg.processing.ram_boost:
            idx = 0
            ent = self.api_call_cache[idx]
            while ent:
                # remove the values we don't want to encode in reports
                for arg in ent["arguments"]:
                    del arg["raw_value"]
                idx += 1
                ent = self.api_call_cache[idx]

    def _parse(self, row):
        """Parse log row.
        @param row: row data.
        @return: parsed information dict.
        """
        arguments = []

        try:
            timestamp = row[0]  # Timestamp of current API call invocation.
            thread_id = row[1]  # Thread ID.
            caller = row[2]  # non-system DLL return address
            parentcaller = row[3]  # non-system DLL parent of non-system-DLL return address
            category = row[4]  # Win32 function category.
            api_name = row[5]  # Name of the Windows API.
            repeated = row[6]  # Times log repeated
            status_value = row[7]  # Success or Failure?
            return_value = row[8]  # Value returned by the function.
        except IndexError as e:
            log.debug("Unable to parse process log row: %s", e)
            return None

        # Now walk through the remaining columns, which will contain API
        # arguments.
        for api_arg in row[9:]:
            # Split the argument name with its value based on the separator.
            try:
                arg_name, arg_value = api_arg
            except ValueError as e:
                log.debug("Unable to parse analysis row argument (row=%s): %s", api_arg, e)
                continue

            argument = {"name": arg_name}
            if isinstance(arg_value, bytes):
                arg_value = bytes2str(arg_value)

            if arg_value and isinstance(arg_value, list) and len(arg_value) >= 1 and isinstance(arg_value[0], bytes):
                arg_value = " ".join(bytes2str(arg_value))

            try:
                argument["value"] = convert_to_printable(arg_value, self.conversion_cache)
            except Exception as e:
                log.error(arg_value, exc_info=True)
                continue
            if not self.reporting_mode:
                argument["raw_value"] = arg_value
            pretty = pretty_print_arg(category, api_name, arg_name, argument["value"])
            if pretty:
                argument["pretty_value"] = pretty
            arguments.append(argument)

        call = {
            "timestamp": timestamp,
            "thread_id": str(thread_id),
            "caller": f"0x{default_converter(caller):08x}",
            "parentcaller": f"0x{default_converter(parentcaller):08x}",
            "category": category,
            "api": api_name,
            "status": bool(int(status_value)),
        }

        if isinstance(return_value, int):
            call["return"] = f"0x{default_converter(return_value):08x}"
        else:
            call["return"] = convert_to_printable(str(return_value), self.conversion_cache)

        prettyret = pretty_print_retval(call["status"], call["return"])
        if prettyret:
            call["pretty_return"] = prettyret

        call["arguments"] = arguments
        call["repeated"] = repeated

        # add the thread id to our thread set
        if call["thread_id"] not in self.threads:
            self.threads.append(call["thread_id"])

        return call

Пример #12

0

Показать файл

Файл: test_netlog.py Проект: hariomenkel/CAPEv2

 def test_init(self, bson_file):
     assert BsonParser(bson_file)

Пример #13

0

Показать файл

Файл: behavior.py Проект: KillerInstinct/elastic-cuckoo-modified

class ParseProcessLog(list):
    """Parses process log file."""

    def __init__(self, log_path):
        """@param log_path: log file path."""
        self._log_path = log_path
        self.fd = None
        self.parser = None

        self.reporting_mode = False
        self.process_id = None
        self.process_name = None
        self.parent_id = None
        self.module_path = None
        self.threads = []
        self.first_seen = None
        self.calls = self
        self.lastcall = None
        self.environdict = None
        self.api_count = 0
        self.call_id = 0
        self.conversion_cache = {}
        self.cfg = Config()
        self.api_limit = self.cfg.processing.analysis_call_limit  # Limit of API calls per process

        if os.path.exists(log_path) and os.stat(log_path).st_size > 0:
            self.parse_first_and_reset()

        if self.cfg.processing.ram_boost:
            self.api_call_cache = []
            self.api_pointer = 0

            try:
                while True:
                    i = self.cacheless_next()
                    self.api_call_cache.append(i)
            except StopIteration:
                pass
            self.api_call_cache.append(None)

    def parse_first_and_reset(self):
        """ Open file and init Bson Parser. Read till first process
        """
        self.fd = open(self._log_path, "rb")

        if self._log_path.endswith(".bson"):
            self.parser = BsonParser(self)
        else:
            self.fd.close()
            self.fd = None
            return

        # Get the process information from file to determine
        # process id (file names.)
        while not self.process_id:
            self.parser.read_next_message()

        self.fd.seek(0)

    def read(self, length):
        """ Read data from log file

        @param length: Length in byte to read
        """
        if not length:
            return ''
        buf = self.fd.read(length)
        if not buf or len(buf) != length:
            raise EOFError()
        return buf

    def __iter__(self):
        #import inspect
        #log.debug('iter called by this guy: {0}'.format(inspect.stack()[1]))
        return self

    def __repr__(self):
        return "<ParseProcessLog log-path: %r>" % self._log_path

    def __nonzero__(self):
        return self.wait_for_lastcall()

    def reset(self):
        """ Reset fd
        """
        self.fd.seek(0)
        self.api_count = 0
        self.lastcall = None
        self.call_id = 0
        self.api_pointer = 0

    def compare_calls(self, a, b):
        """Compare two calls for equality. Same implementation as before netlog.
        @param a: call a
        @param b: call b
        @return: True if a == b else False
        """
        if a["api"] == b["api"] and \
                a["status"] == b["status"] and \
                a["arguments"] == b["arguments"] and \
                a["return"] == b["return"]:
            return True
        return False

    def wait_for_lastcall(self):
        """ If there is no lastcall, iterate through messages till a call is found or EOF.
        To get the next call, set self.lastcall to None before calling this function

        @return: True if there is a call, False on EOF
        """
        while not self.lastcall:
            try:
                if not self.parser.read_next_message():
                    return False
            except EOFError:
                return False

        return True

    def cacheless_next(self):
        if not self.fd:
            raise StopIteration()

        if not self.wait_for_lastcall():
            self.reset()
            raise StopIteration()

        self.api_count += 1
        if self.api_limit and self.api_count > self.api_limit:
            self.reset()
            raise StopIteration()

        nextcall, self.lastcall = self.lastcall, None

        self.wait_for_lastcall()
        while self.lastcall and self.compare_calls(nextcall, self.lastcall):
            nextcall["repeated"] += self.lastcall["repeated"] + 1
            self.lastcall = None
            self.wait_for_lastcall()

        nextcall["id"] = self.call_id
        self.call_id += 1

        return nextcall

    def next(self):
        """ Just accessing the cache
        """

        if self.cfg.processing.ram_boost:
            res = self.api_call_cache[self.api_pointer]
            if res is None:
                self.reset()
                raise StopIteration()
            self.api_pointer += 1
            return res
        else:
            return self.cacheless_next()

    def log_process(self, context, timestring, pid, ppid, modulepath, procname):
        """ log process information parsed from data file

        @param context: ignored
        @param timestring: Process first seen time
        @param pid: PID
        @param ppid: Parent PID
        @param modulepath: ignored
        @param procname: Process name
        """
        self.process_id, self.parent_id, self.process_name = pid, ppid, procname
        self.module_path = modulepath
        self.first_seen = timestring

    def log_thread(self, context, pid):
        pass

    def log_environ(self, context, environdict):
        """ log user/process environment information for later use in behavioral signatures

        @param context: ignored
        @param environdict: dict of the various collected information, which will expand over time
        """

        self.environdict = environdict

    def log_anomaly(self, subcategory, tid, funcname, msg):
        """ log an anomaly parsed from data file

        @param subcategory:
        @param tid: Thread ID
        @param funcname:
        @param msg:
        """
        self.lastcall = dict(thread_id=tid, category="anomaly", api="",
                             subcategory=subcategory, funcname=funcname,
                             msg=msg)

    def log_call(self, context, apiname, category, arguments):
        """ log an api call from data file
        @param context: containing additional api info
        @param apiname: name of the api
        @param category: win32 function category
        @param arguments: arguments to the api call
        """
        apiindex, repeated, status, returnval, tid, timediff, caller, parentcaller = context


        current_time = self.first_seen + datetime.timedelta(0, 0, timediff*1000)
        timestring = logtime(current_time)

        self.lastcall = self._parse([timestring,
                                     tid,
                                     caller,
                                     parentcaller,
                                     category,
                                     apiname,
                                     repeated,
                                     status,
                                     returnval] + arguments)

    def log_error(self, emsg):
        """ Log an error
        """
        log.warning("ParseProcessLog error condition on log %s: %s", str(self._log_path), emsg)

    def begin_reporting(self):
        self.reporting_mode = True
        if self.cfg.processing.ram_boost:
            idx = 0
            while True:
                ent = self.api_call_cache[idx]
                if not ent:
                    break
                # remove the values we don't want to encode in reports
                for arg in ent["arguments"]:
                    del arg["raw_value"]
                idx += 1

    def _parse(self, row):
        """Parse log row.
        @param row: row data.
        @return: parsed information dict.
        """
        call = {}
        arguments = []

        try:
            timestamp = row[0]    # Timestamp of current API call invocation.
            thread_id = row[1]    # Thread ID.
            caller = row[2]       # non-system DLL return address
            parentcaller = row[3]       # non-system DLL parent of non-system-DLL return address
            category = row[4]     # Win32 function category.
            api_name = row[5]     # Name of the Windows API.
            repeated = row[6]     # Times log repeated
            status_value = row[7] # Success or Failure?
            return_value = row[8] # Value returned by the function.
        except IndexError as e:
            log.debug("Unable to parse process log row: %s", e)
            return None

        # Now walk through the remaining columns, which will contain API
        # arguments.
        for index in range(9, len(row)):
            argument = {}

            # Split the argument name with its value based on the separator.
            try:
                arg_name, arg_value = row[index]
            except ValueError as e:
                log.debug("Unable to parse analysis row argument (row=%s): %s", row[index], e)
                continue

            argument["name"] = arg_name

            argument["value"] = convert_to_printable(str(arg_value), self.conversion_cache)
            if not self.reporting_mode:
                argument["raw_value"] = arg_value
            pretty = pretty_print_arg(category, api_name, arg_name, argument["value"])
            if pretty:
                argument["pretty_value"] = pretty
            arguments.append(argument)

        call["timestamp"] = timestamp
        call["thread_id"] = str(thread_id)
        call["caller"] = "0x%.08x" % caller
        call["parentcaller"] = "0x%.08x" % parentcaller
        call["category"] = category
        call["api"] = api_name
        call["status"] = bool(int(status_value))

        if isinstance(return_value, int) or isinstance(return_value, long):
            call["return"] = "0x%.08x" % return_value
        else:
            call["return"] = convert_to_printable(str(return_value), self.conversion_cache)

        prettyret = pretty_print_retval(category, api_name, call["status"], call["return"])
        if prettyret:
            call["pretty_return"] = prettyret

        call["arguments"] = arguments
        call["repeated"] = repeated

        # add the thread id to our thread set
        if call["thread_id"] not in self.threads:
            self.threads.append(call["thread_id"])

        return call

Пример #14

0

Показать файл

Файл: behavior.py Проект: KillerInstinct/elastic-cuckoo-modified

class ParseProcessLog(list):
    """Parses process log file."""
    def __init__(self, log_path):
        """@param log_path: log file path."""
        self._log_path = log_path
        self.fd = None
        self.parser = None

        self.reporting_mode = False
        self.process_id = None
        self.process_name = None
        self.parent_id = None
        self.module_path = None
        self.threads = []
        self.first_seen = None
        self.calls = self
        self.lastcall = None
        self.environdict = None
        self.api_count = 0
        self.call_id = 0
        self.conversion_cache = {}
        self.cfg = Config()
        self.api_limit = self.cfg.processing.analysis_call_limit  # Limit of API calls per process

        if os.path.exists(log_path) and os.stat(log_path).st_size > 0:
            self.parse_first_and_reset()

        if self.cfg.processing.ram_boost:
            self.api_call_cache = []
            self.api_pointer = 0

            try:
                while True:
                    i = self.cacheless_next()
                    self.api_call_cache.append(i)
            except StopIteration:
                pass
            self.api_call_cache.append(None)

    def parse_first_and_reset(self):
        """ Open file and init Bson Parser. Read till first process
        """
        self.fd = open(self._log_path, "rb")

        if self._log_path.endswith(".bson"):
            self.parser = BsonParser(self)
        else:
            self.fd.close()
            self.fd = None
            return

        # Get the process information from file to determine
        # process id (file names.)
        while not self.process_id:
            self.parser.read_next_message()

        self.fd.seek(0)

    def read(self, length):
        """ Read data from log file

        @param length: Length in byte to read
        """
        if not length:
            return ''
        buf = self.fd.read(length)
        if not buf or len(buf) != length:
            raise EOFError()
        return buf

    def __iter__(self):
        #import inspect
        #log.debug('iter called by this guy: {0}'.format(inspect.stack()[1]))
        return self

    def __repr__(self):
        return "<ParseProcessLog log-path: %r>" % self._log_path

    def __nonzero__(self):
        return self.wait_for_lastcall()

    def reset(self):
        """ Reset fd
        """
        self.fd.seek(0)
        self.api_count = 0
        self.lastcall = None
        self.call_id = 0
        self.api_pointer = 0

    def compare_calls(self, a, b):
        """Compare two calls for equality. Same implementation as before netlog.
        @param a: call a
        @param b: call b
        @return: True if a == b else False
        """
        if a["api"] == b["api"] and \
                a["status"] == b["status"] and \
                a["arguments"] == b["arguments"] and \
                a["return"] == b["return"]:
            return True
        return False

    def wait_for_lastcall(self):
        """ If there is no lastcall, iterate through messages till a call is found or EOF.
        To get the next call, set self.lastcall to None before calling this function

        @return: True if there is a call, False on EOF
        """
        while not self.lastcall:
            try:
                if not self.parser.read_next_message():
                    return False
            except EOFError:
                return False

        return True

    def cacheless_next(self):
        if not self.fd:
            raise StopIteration()

        if not self.wait_for_lastcall():
            self.reset()
            raise StopIteration()

        self.api_count += 1
        if self.api_limit and self.api_count > self.api_limit:
            self.reset()
            raise StopIteration()

        nextcall, self.lastcall = self.lastcall, None

        self.wait_for_lastcall()
        while self.lastcall and self.compare_calls(nextcall, self.lastcall):
            nextcall["repeated"] += self.lastcall["repeated"] + 1
            self.lastcall = None
            self.wait_for_lastcall()

        nextcall["id"] = self.call_id
        self.call_id += 1

        return nextcall

    def next(self):
        """ Just accessing the cache
        """

        if self.cfg.processing.ram_boost:
            res = self.api_call_cache[self.api_pointer]
            if res is None:
                self.reset()
                raise StopIteration()
            self.api_pointer += 1
            return res
        else:
            return self.cacheless_next()

    def log_process(self, context, timestring, pid, ppid, modulepath,
                    procname):
        """ log process information parsed from data file

        @param context: ignored
        @param timestring: Process first seen time
        @param pid: PID
        @param ppid: Parent PID
        @param modulepath: ignored
        @param procname: Process name
        """
        self.process_id, self.parent_id, self.process_name = pid, ppid, procname
        self.module_path = modulepath
        self.first_seen = timestring

    def log_thread(self, context, pid):
        pass

    def log_environ(self, context, environdict):
        """ log user/process environment information for later use in behavioral signatures

        @param context: ignored
        @param environdict: dict of the various collected information, which will expand over time
        """

        self.environdict = environdict

    def log_anomaly(self, subcategory, tid, funcname, msg):
        """ log an anomaly parsed from data file

        @param subcategory:
        @param tid: Thread ID
        @param funcname:
        @param msg:
        """
        self.lastcall = dict(thread_id=tid,
                             category="anomaly",
                             api="",
                             subcategory=subcategory,
                             funcname=funcname,
                             msg=msg)

    def log_call(self, context, apiname, category, arguments):
        """ log an api call from data file
        @param context: containing additional api info
        @param apiname: name of the api
        @param category: win32 function category
        @param arguments: arguments to the api call
        """
        apiindex, repeated, status, returnval, tid, timediff, caller, parentcaller = context

        current_time = self.first_seen + datetime.timedelta(
            0, 0, timediff * 1000)
        timestring = logtime(current_time)

        self.lastcall = self._parse([
            timestring, tid, caller, parentcaller, category, apiname, repeated,
            status, returnval
        ] + arguments)

    def log_error(self, emsg):
        """ Log an error
        """
        log.warning("ParseProcessLog error condition on log %s: %s",
                    str(self._log_path), emsg)

    def begin_reporting(self):
        self.reporting_mode = True
        if self.cfg.processing.ram_boost:
            idx = 0
            while True:
                ent = self.api_call_cache[idx]
                if not ent:
                    break
                # remove the values we don't want to encode in reports
                for arg in ent["arguments"]:
                    del arg["raw_value"]
                idx += 1

    def _parse(self, row):
        """Parse log row.
        @param row: row data.
        @return: parsed information dict.
        """
        call = {}
        arguments = []

        try:
            timestamp = row[0]  # Timestamp of current API call invocation.
            thread_id = row[1]  # Thread ID.
            caller = row[2]  # non-system DLL return address
            parentcaller = row[
                3]  # non-system DLL parent of non-system-DLL return address
            category = row[4]  # Win32 function category.
            api_name = row[5]  # Name of the Windows API.
            repeated = row[6]  # Times log repeated
            status_value = row[7]  # Success or Failure?
            return_value = row[8]  # Value returned by the function.
        except IndexError as e:
            log.debug("Unable to parse process log row: %s", e)
            return None

        # Now walk through the remaining columns, which will contain API
        # arguments.
        for index in range(9, len(row)):
            argument = {}

            # Split the argument name with its value based on the separator.
            try:
                arg_name, arg_value = row[index]
            except ValueError as e:
                log.debug("Unable to parse analysis row argument (row=%s): %s",
                          row[index], e)
                continue

            argument["name"] = arg_name

            argument["value"] = convert_to_printable(str(arg_value),
                                                     self.conversion_cache)
            if not self.reporting_mode:
                argument["raw_value"] = arg_value
            pretty = pretty_print_arg(category, api_name, arg_name,
                                      argument["value"])
            if pretty:
                argument["pretty_value"] = pretty
            arguments.append(argument)

        call["timestamp"] = timestamp
        call["thread_id"] = str(thread_id)
        call["caller"] = "0x%.08x" % caller
        call["parentcaller"] = "0x%.08x" % parentcaller
        call["category"] = category
        call["api"] = api_name
        call["status"] = bool(int(status_value))

        if isinstance(return_value, int) or isinstance(return_value, long):
            call["return"] = "0x%.08x" % return_value
        else:
            call["return"] = convert_to_printable(str(return_value),
                                                  self.conversion_cache)

        prettyret = pretty_print_retval(category, api_name, call["status"],
                                        call["return"])
        if prettyret:
            call["pretty_return"] = prettyret

        call["arguments"] = arguments
        call["repeated"] = repeated

        # add the thread id to our thread set
        if call["thread_id"] not in self.threads:
            self.threads.append(call["thread_id"])

        return call

Python BsonParser примеры использования