def read_next_message(self): apiindex, status = struct.unpack("BB", self.handler.read(2)) returnval, tid, timediff = struct.unpack("III", self.handler.read(12)) context = (apiindex, status, returnval, tid, timediff) if apiindex == 0: # new process message timelow = self.read_int32() timehigh = self.read_int32() # FILETIME is 100-nanoseconds from 1601 :/ vmtimeunix = (timelow + (timehigh << 32)) / 10000000.0 - 11644473600 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) pid = self.read_int32() ppid = self.read_int32() modulepath = self.read_string() procname = get_filename_from_path(modulepath) self.handler.log_process(context, vmtime, pid, ppid, modulepath, procname) elif apiindex == 1: # new thread message pid = self.read_int32() self.handler.log_thread(context, pid) else: # actual API call apiname, modulename, parseinfo = LOGTBL[apiindex] formatspecifiers, argnames = expand_format( parseinfo[0]), parseinfo[1:] arguments = [] for pos in range(len(formatspecifiers)): fs = formatspecifiers[pos] argname = argnames[pos] fn = self.formatmap.get(fs, None) if fn: r = fn() arguments.append((argname, r)) else: log.warning( "No handler for format specifier {0} on apitype {1}". format(fs, apiname)) self.handler.log_call(context, apiname, modulename, arguments) return True
def read_next_message(self): apiindex, status = struct.unpack("BB", self.handler.read(2)) returnval, tid, timediff = struct.unpack("III", self.handler.read(12)) context = (apiindex, status, returnval, tid, timediff) if apiindex == 0: # new process message timelow = self.read_int32() timehigh = self.read_int32() # FILETIME is 100-nanoseconds from 1601 :/ vmtimeunix = (timelow + (timehigh << 32)) / 10000000.0 - 11644473600 try: vmtime = datetime.datetime.fromtimestamp(vmtimeunix) except: vmtime = None pid = self.read_int32() ppid = self.read_int32() modulepath = self.read_string() procname = get_filename_from_path(modulepath) self.handler.log_process(context, vmtime, pid, ppid, modulepath, procname) elif apiindex == 1: # new thread message pid = self.read_int32() self.handler.log_thread(context, pid) else: # actual API call apiname, modulename, parseinfo = LOGTBL[apiindex] formatspecifiers, argnames = expand_format(parseinfo[0]), parseinfo[1:] arguments = [] for pos in range(len(formatspecifiers)): fs = formatspecifiers[pos] argname = argnames[pos] fn = self.formatmap.get(fs, None) if fn: r = fn() arguments.append((argname, r)) else: log.warning("No handler for format specifier {0} on apitype {1}".format(fs,apiname)) self.handler.log_call(context, apiname, modulename, arguments) return True
def __iter__(self): self.fd.seek(0) while True: data = self.fd.read(4) if not data: return if not len(data) == 4: log.critical("BsonParser lacking data.") return blen = struct.unpack("I", data)[0] if blen > MAX_MESSAGE_LENGTH: log.critical("BSON message larger than MAX_MESSAGE_LENGTH, " "stopping handler.") return data += self.fd.read(blen-4) if len(data) < blen: log.critical("BsonParser lacking data.") return try: dec = bson_decode(data) except Exception as e: log.warning("BsonParser decoding problem {0} on " "data[:50] {1}".format(e, repr(data[:50]))) return mtype = dec.get("type", "none") index = dec.get("I", -1) if mtype == "info": # API call index info message, explaining the argument names, etc. name = dec.get("name", "NONAME") arginfo = dec.get("args", []) category = dec.get("category") argnames, converters = check_names_for_typeinfo(arginfo) self.infomap[index] = name, arginfo, argnames, converters, category if dec.get("flags_value"): self.flags_value[name] = {} for arg, values in dec["flags_value"].items(): self.flags_value[name][arg] = dict(values) if dec.get("flags_bitmask"): self.flags_bitmask[name] = {} for arg, values in dec["flags_bitmask"].items(): self.flags_bitmask[name][arg] = values continue # Handle dumped buffers. if mtype == "buffer": buf = dec.get("buffer") sha1 = dec.get("checksum") # Why do we pass along a sha1 checksum again? if sha1 != hashlib.sha1(buf).hexdigest(): log.warning("Incorrect sha1 passed along for a buffer.") continue # If the parent is netlogs ResultHandler then we actually dump # it - this should only be the case during the analysis, any # after proposing will then be ignored. from lib.cuckoo.core.resultserver import ResultHandler if isinstance(self.fd, ResultHandler): filepath = os.path.join(self.fd.storagepath, "buffer", sha1) with open(filepath, "wb") as f: f.write(buf) continue tid = dec.get("T", 0) time = dec.get("t", 0) parsed = { "type": mtype, "tid": tid, "time": time, } if mtype == "debug": log.info("Debug message from monitor: {0}".format(dec.get("msg", ""))) parsed["message"] = dec.get("msg", "") else: # Regular api call from monitor if index not in self.infomap: log.warning("Got API with unknown index - monitor needs " "to explain first: {0}".format(dec)) continue apiname, arginfo, argnames, converters, category = self.infomap[index] args = dec.get("args", []) if len(args) != len(argnames): log.warning("Inconsistent arg count (compared to arg names) " "on {2}: {0} names {1}".format(dec, argnames, apiname)) continue argdict = dict((argnames[i], converters[i](args[i])) for i in range(len(args))) # Special new process message from the monitor. if apiname == "__process__": parsed["type"] = "process" if "TimeLow" in argdict: timelow = argdict["TimeLow"] timehigh = argdict["TimeHigh"] parsed["pid"] = pid = argdict["ProcessIdentifier"] parsed["ppid"] = argdict["ParentProcessIdentifier"] modulepath = argdict["ModulePath"] # FILETIME is 100-nanoseconds from 1601 :/ vmtimeunix = (timelow + (timehigh << 32)) vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 elif "time_low" in argdict: timelow = argdict["time_low"] timehigh = argdict["time_high"] if "pid" in argdict: parsed["pid"] = pid = argdict["pid"] parsed["ppid"] = argdict["ppid"] else: parsed["pid"] = pid = argdict["process_identifier"] parsed["ppid"] = argdict["parent_process_identifier"] modulepath = argdict["module_path"] # FILETIME is 100-nanoseconds from 1601 :/ vmtimeunix = (timelow + (timehigh << 32)) vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 elif "TimeStamp" in argdict: vmtimeunix = argdict["TimeStamp"] / 1000.0 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) parsed["pid"] = pid = argdict["ProcessIdentifier"] parsed["ppid"] = argdict["ParentProcessIdentifier"] modulepath = argdict["ModulePath"] else: raise CuckooResultError("I don't recognise the bson log contents.") vmtime = datetime.datetime.fromtimestamp(vmtimeunix) parsed["first_seen"] = vmtime procname = get_filename_from_path(modulepath) parsed["process_name"] = procname self.pid = pid elif apiname == "__thread__": parsed["pid"] = pid = argdict["ProcessIdentifier"] # elif apiname == "__anomaly__": # tid = argdict["ThreadIdentifier"] # subcategory = argdict["Subcategory"] # msg = argdict["Message"] # self.handler.log_anomaly(subcategory, tid, msg) # return True else: parsed["type"] = "apicall" parsed["pid"] = self.pid parsed["api"] = apiname parsed["category"] = category parsed["status"] = argdict.pop("is_success", 1) parsed["return_value"] = argdict.pop("retval", 0) parsed["arguments"] = argdict parsed["flags"] = {} parsed["stacktrace"] = dec.get("s", []) parsed["uniqhash"] = dec.get("h", 0) if "e" in dec and "E" in dec: parsed["last_error"] = dec["e"] parsed["nt_status"] = dec["E"] if apiname in self.flags_value: self.resolve_flags(apiname, argdict, parsed["flags"]) yield parsed
def read_next_message(self): apiindex, status = struct.unpack("BB", self.handler.read(2)) returnval, tid, timediff = struct.unpack("III", self.handler.read(12)) context = (apiindex, status, returnval, tid, timediff) if apiindex == 0: # new process message timelow = self.read_int32() timehigh = self.read_int32() # FILETIME is 100-nanoseconds from 1601 :/ vmtimeunix = (timelow + (timehigh << 32)) vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 try: vmtime = datetime.datetime.fromtimestamp(vmtimeunix) except: log.critical("vmtime in new-process-messsage out of range " "(protocol out of sync?)") return False pid = self.read_int32() ppid = self.read_int32() try: modulepath = self.read_string() procname = get_filename_from_path(modulepath) except: log.exception("Exception in netlog protocol, stopping parser.") return False if len(procname) > 255: log.critical("Huge process name (>255), assuming netlog " "protocol out of sync.") log.debug("Process name: %s", repr(procname)) return False self.handler.log_process(context, vmtime, pid, ppid, modulepath, procname) elif apiindex == 1: # new thread message pid = self.read_int32() self.handler.log_thread(context, pid) else: # actual API call try: apiname, modulename, parseinfo = LOGTBL[apiindex] except IndexError: log.error("Netlog LOGTBL lookup error for API index {0} " "(pid={1}, tid={2})".format(apiindex, None, tid)) return False formatspecifiers = expand_format(parseinfo[0]) argnames = parseinfo[1:] arguments = [] for pos in range(len(formatspecifiers)): fs = formatspecifiers[pos] argname = argnames[pos] fn = self.formatmap.get(fs, None) if fn: try: r = fn() except: log.exception("Exception in netlog protocol, " "stopping parser.") return False arguments.append((argname, r)) else: log.warning("No handler for format specifier {0} on " "apitype {1}".format(fs, apiname)) self.handler.log_call(context, apiname, modulename, arguments) return True
def read_next_message(self): data = self.handler.read(4) blen = struct.unpack("I", data)[0] if blen > MAX_MESSAGE_LENGTH: log.critical("BSON message larger than MAX_MESSAGE_LENGTH, " "stopping handler.") return False data += self.handler.read(blen-4) try: dec = bson_decode(data) except Exception as e: log.warning("BsonParser decoding problem {0} on " "data[:50] {1}".format(e, repr(data[:50]))) return False mtype = dec.get("type", "none") index = dec.get("I", -1) tid = dec.get("T", 0) time = dec.get("t", 0) #context = (apiindex, status, returnval, tid, timediff) context = [index, 1, 0, tid, time] if mtype == "info": # API call index info message, explaining the argument names, etc name = dec.get("name", "NONAME") arginfo = dec.get("args", []) category = dec.get("category") # Bson dumps that were generated before cuckoomon exported the # "category" field have to get the category using the old method. if not category: # Try to find the entry/entries with this api name. category = [_ for _ in LOGTBL if _[0] == name] # If we found an entry, take its category, otherwise we take # the default string "unknown." category = category[0][1] if category else "unknown" argnames, converters = check_names_for_typeinfo(arginfo) self.infomap[index] = name, arginfo, argnames, converters, category elif mtype == "debug": log.info("Debug message from monitor: " "{0}".format(dec.get("msg", ""))) elif mtype == "new_process": # new_process message from VMI monitor vmtime = datetime.datetime.fromtimestamp(dec.get("starttime", 0)) procname = dec.get("name", "NONAME") ppid = 0 modulepath = "DUMMY" self.handler.log_process(context, vmtime, None, ppid, modulepath, procname) else: # regular api call if not index in self.infomap: log.warning("Got API with unknown index - monitor needs " "to explain first: {0}".format(dec)) return True apiname, arginfo, argnames, converters, category = self.infomap[index] args = dec.get("args", []) if len(args) != len(argnames): log.warning("Inconsistent arg count (compared to arg names) " "on {2}: {0} names {1}".format(dec, argnames, apiname)) return True argdict = dict((argnames[i], converters[i](args[i])) for i in range(len(args))) # CHANGED: Save all APIs to keep track of them and enable triggering. if hasattr(self.handler, "set_apis"): self.handler.set_apis([(apiname, argdict)]) if apiname == "__process__": # special new process message from cuckoomon timelow = argdict["TimeLow"] timehigh = argdict["TimeHigh"] # FILETIME is 100-nanoseconds from 1601 :/ vmtimeunix = (timelow + (timehigh << 32)) vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) pid = argdict["ProcessIdentifier"] ppid = argdict["ParentProcessIdentifier"] modulepath = argdict["ModulePath"] procname = get_filename_from_path(modulepath) self.handler.log_process(context, vmtime, pid, ppid, modulepath, procname) self.execute_trigger_parameters(apiname, argdict) return True elif apiname == "__thread__": pid = argdict["ProcessIdentifier"] self.handler.log_thread(context, pid) return True elif apiname == "NtResumeThread": if hasattr(self.handler, "server"): set_thread_args = None for api in self.handler.get_apis(): if type(api) == tuple: name = api[0] args = api[1] if name == 'NtSetContextThread': set_thread_args = args break if set_thread_args is not None: if set_thread_args['ThreadHandle'] == argdict['ThreadHandle']: self.execute_trigger_parameters("NtSetContextThread -> NtResumeThread", argdict) elif apiname == "CreateRemoteThread": write_proc_args = None if hasattr(self.handler, "server"): for api in self.handler.get_apis(): if type(api) == tuple: name = api[0] args = api[1] if name == 'WriteProcessMemory': write_proc_args = args break if write_proc_args is not None: self.execute_trigger_parameters("WriteProcessMemory -> CreateRemoteThread", argdict) elif apiname == "LdrLoadDll": if hasattr(self.handler, "server"): write_proc_args = None apis = self.handler.get_apis() for api in apis: if type(api) == tuple: name = api[0] args = api[1] if name == 'WriteProcessMemory': write_proc_args = args break if write_proc_args is not None: for api in apis: if type(api) == tuple: name = api[0] args = api[1] if name == "CreateRemoteThread": if args['ProcessHandle'] == write_proc_args['ProcessHandle']: if argdict['ProcessId'] == args['ProcessId']: self.execute_trigger_parameters("WriteProcessMemory -> CreateRemoteThread -> LoadLibrary", argdict) self.handler.remove_from_apis(('WriteProcessMemory', write_proc_args)) self.handler.remove_from_apis((name, args)) break else: self.execute_trigger_parameters(apiname, argdict) context[1] = argdict.pop("is_success", 1) context[2] = argdict.pop("retval", 0) arguments = argdict.items() arguments += dec.get("aux", {}).items() self.handler.log_call(context, apiname, category, arguments) return True
def __iter__(self): self.fd.seek(0) while True: data = self.fd.read(4) if not data: return if not len(data) == 4: log.critical("BsonParser lacking data.") return blen = struct.unpack("I", data)[0] if blen > MAX_MESSAGE_LENGTH: log.critical("BSON message larger than MAX_MESSAGE_LENGTH, " "stopping handler.") return data += self.fd.read(blen-4) if len(data) < blen: log.critical("BsonParser lacking data.") return try: dec = bson_decode(data) except Exception as e: log.warning("BsonParser decoding problem {0} on " "data[:50] {1}".format(e, repr(data[:50]))) return mtype = dec.get("type", "none") index = dec.get("I", -1) if mtype == "info": # API call index info message, explaining the argument names, etc. name = dec.get("name", "NONAME") arginfo = dec.get("args", []) category = dec.get("category") argnames, converters = check_names_for_typeinfo(arginfo) self.infomap[index] = name, arginfo, argnames, converters, category continue tid = dec.get("T", 0) time = dec.get("t", 0) parsed = { "type": mtype, "tid": tid, "time": time, } if mtype == "debug": log.info("Debug message from monitor: {0}".format(dec.get("msg", ""))) parsed["message"] = dec.get("msg", "") else: # Regular api call from monitor if index not in self.infomap: log.warning("Got API with unknown index - monitor needs " "to explain first: {0}".format(dec)) continue apiname, arginfo, argnames, converters, category = self.infomap[index] args = dec.get("args", []) if len(args) != len(argnames): log.warning("Inconsistent arg count (compared to arg names) " "on {2}: {0} names {1}".format(dec, argnames, apiname)) continue argdict = dict((argnames[i], converters[i](args[i])) for i in range(len(args))) # Special new process message from the monitor. if apiname == "__process__": parsed["type"] = "process" if "TimeLow" in argdict: timelow = argdict["TimeLow"] timehigh = argdict["TimeHigh"] parsed["pid"] = pid = argdict["ProcessIdentifier"] parsed["ppid"] = argdict["ParentProcessIdentifier"] modulepath = argdict["ModulePath"] elif "time_low" in argdict: timelow = argdict["time_low"] timehigh = argdict["time_high"] if "pid" in argdict: parsed["pid"] = pid = argdict["pid"] parsed["ppid"] = argdict["ppid"] else: parsed["pid"] = pid = argdict["process_identifier"] parsed["ppid"] = argdict["parent_process_identifier"] modulepath = argdict["module_path"] else: raise CuckooResultError("I don't recognise the bson log contents.") # FILETIME is 100-nanoseconds from 1601 :/ vmtimeunix = (timelow + (timehigh << 32)) vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) parsed["first_seen"] = vmtime procname = get_filename_from_path(modulepath) parsed["process_name"] = procname self.pid = pid elif apiname == "__thread__": parsed["pid"] = pid = argdict["ProcessIdentifier"] # elif apiname == "__anomaly__": # tid = argdict["ThreadIdentifier"] # subcategory = argdict["Subcategory"] # msg = argdict["Message"] # self.handler.log_anomaly(subcategory, tid, msg) # return True else: parsed["type"] = "apicall" parsed["pid"] = self.pid parsed["api"] = apiname parsed["category"] = category parsed["status"] = argdict.pop("is_success", 1) parsed["return_value"] = argdict.pop("retval", 0) parsed["arguments"] = argdict parsed["stacktrace"] = dec.get("s", []) parsed["uniqhash"] = dec.get("h", 0) if "e" in dec and "E" in dec: parsed["last_error"] = dec["e"] parsed["nt_status"] = dec["E"] if apiname in self.flags: for flag in self.flags[apiname].keys(): argdict[flag + "_s"] = self._flag_represent(apiname, flag, argdict[flag]) yield parsed
def read_next_message(self): #self.fd.seek(0) while True: data = self.fd.read(4) if not data: return if len(data) != 4: log.critical("BsonParser lacking data.") return blen = struct.unpack("I", data)[0] if blen > MAX_MESSAGE_LENGTH: log.critical("BSON message larger than MAX_MESSAGE_LENGTH, " "stopping handler.") return False data += self.fd.read(blen - 4) if len(data) < blen: log.critical("BsonParser lacking data.") return try: dec = bson_decode(data) except Exception as e: log.warning("BsonParser decoding problem {0} on " "data[:50] {1}".format(e, repr(data[:50]))) return False mtype = dec.get("type", "none") index = dec.get("I", -1) tid = dec.get("T", 0) time = dec.get("t", 0) caller = dec.get("R", 0) parentcaller = dec.get("P", 0) repeated = dec.get("r", 0) context = [index, repeated, 1, 0, tid, time, caller, parentcaller] if mtype == "info": # API call index info message, explaining the argument names, etc. name = dec.get("name", "NONAME") arginfo = dec.get("args", []) category = dec.get("category") # Bson dumps that were generated before cuckoomon exported the # "category" field have to get the category using the old method. if not category: # Try to find the entry/entries with this api name. category = [_ for _ in LOGTBL if _[0] == name] # If we found an entry, take its category, otherwise we take # the default string "unknown." category = category[0][1] if category else "unknown" argnames, converters = check_names_for_typeinfo(arginfo) #self.determine_unserializers(arginfo) self.infomap[index] = name, arginfo, argnames, converters, category if dec.get("flags_value"): self.flags_value[name] = {} for arg, values in dec["flags_value"].items(): self.flags_value[name][arg] = dict(values) if dec.get("flags_bitmask"): self.flags_bitmask[name] = {} for arg, values in dec["flags_bitmask"].items(): self.flags_bitmask[name][arg] = values continue elif mtype == "debug": log.info("Debug message from monitor: " "{0}".format(dec.get("msg", ""))) elif mtype == "new_process": # new_process message from VMI monitor. vmtime = datetime.datetime.fromtimestamp(dec.get("starttime", 0)) procname = dec.get("name", "NONAME") ppid = 0 modulepath = "DUMMY" self.fd.log_process(context, vmtime, None, ppid, modulepath, procname) else: # Regular api call. if index not in self.infomap: log.warning("Got API with unknown index - monitor needs " "to explain first: {0}".format(dec)) return True apiname, arginfo, argnames, converters, category = self.infomap[index] args = dec.get("args", []) if len(args) != len(argnames): log.warning("Inconsistent arg count (compared to arg names) " "on %s: %s names %s", dec, argnames, apiname ) continue argdict = dict((argnames[i], converters[i](args[i])) for i in range(len(args))) if apiname == "__process__": # Special new process message from cuckoomon. timelow = argdict["TimeLow"] & 0xFFFFFFFF timehigh = argdict["TimeHigh"] & 0xFFFFFFFF # FILETIME is 100-nanoseconds from 1601 :/ vmtimeunix = (timelow + (timehigh << 32)) vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) pid = argdict["ProcessIdentifier"] ppid = argdict["ParentProcessIdentifier"] modulepath = argdict["ModulePath"] procname = get_filename_from_path(modulepath) self.fd.log_process(context, vmtime, pid, ppid, modulepath, procname) return True elif apiname == "__thread__": pid = argdict["ProcessIdentifier"] self.fd.log_thread(context, pid) return True elif apiname == "__environ__": self.fd.log_environ(context, argdict) return True # elif apiname == "__anomaly__": # tid = argdict["ThreadIdentifier"] # subcategory = argdict["Subcategory"] # msg = argdict["Message"] # self.fd.log_anomaly(subcategory, tid, msg) # return True context[2] = argdict.pop("is_success", 1) context[3] = argdict.pop("retval", 0) arguments = list(argdict.items()) arguments += list(dec.get("aux", {}).items()) self.fd.log_call(context, apiname, category, arguments) return True
def __iter__(self): self.fd.seek(0) while True: data = self.fd.read(4) if not data: return if len(data) != 4: log.critical("BsonParser lacking data.") return blen = struct.unpack("I", data)[0] if blen > MAX_MESSAGE_LENGTH: log.critical("BSON message larger than MAX_MESSAGE_LENGTH, " "stopping handler.") return data += self.fd.read(blen - 4) if len(data) < blen: log.critical("BsonParser lacking data.") return try: dec = bson_decode(data) except Exception as e: log.warning("BsonParser decoding problem {0} on " "data[:50] {1}".format(e, repr(data[:50]))) return mtype = dec.get("type", "none") index = dec.get("I", -1) if mtype == "info": # API call index info message, explaining the argument names, etc. name = dec.get("name", "NONAME") arginfo = dec.get("args", []) category = dec.get("category") argnames, converters = self.determine_unserializers(arginfo) self.infomap[ index] = name, arginfo, argnames, converters, category if dec.get("flags_value"): self.flags_value[name] = {} for arg, values in dec["flags_value"].items(): self.flags_value[name][arg] = dict(values) if dec.get("flags_bitmask"): self.flags_bitmask[name] = {} for arg, values in dec["flags_bitmask"].items(): self.flags_bitmask[name][arg] = values continue # Handle dumped buffers. if mtype == "buffer": buf = dec.get("buffer") sha1 = dec.get("checksum") self.buffer_sha1 = hashlib.sha1(buf).hexdigest() # Why do we pass along a sha1 checksum again? if sha1 != self.buffer_sha1: log.warning("Incorrect sha1 passed along for a buffer.") # If the parent is netlogs ResultHandler then we actually dump # it - this should only be the case during the analysis, any # after proposing will then be ignored. from lib.cuckoo.core.resultserver import ResultHandler if isinstance(self.fd, ResultHandler): filepath = os.path.join(self.fd.storagepath, "buffer", self.buffer_sha1) with open(filepath, "wb") as f: f.write(buf) continue tid = dec.get("T", 0) time = dec.get("t", 0) parsed = { "type": mtype, "tid": tid, "time": time, } if mtype == "debug": log.info("Debug message from monitor: {0}".format( dec.get("msg", ""))) parsed["message"] = dec.get("msg", "") else: # Regular api call from monitor if index not in self.infomap: log.warning("Got API with unknown index - monitor needs " "to explain first: {0}".format(dec)) continue apiname, arginfo, argnames, converters, category = self.infomap[ index] args = dec.get("args", []) if len(args) != len(argnames): log.warning( "Inconsistent arg count (compared to arg names) " "on {2}: {0} names {1}".format(dec, argnames, apiname)) continue argdict = {} for idx, value in enumerate(args): argdict[argnames[idx]] = converters[idx](value) # Special new process message from the monitor. if apiname == "__process__": parsed["type"] = "process" if "TimeLow" in argdict: timelow = argdict["TimeLow"] timehigh = argdict["TimeHigh"] parsed["pid"] = pid = argdict["ProcessIdentifier"] parsed["ppid"] = argdict["ParentProcessIdentifier"] modulepath = argdict["ModulePath"] elif "time_low" in argdict: timelow = argdict["time_low"] timehigh = argdict["time_high"] if "pid" in argdict: parsed["pid"] = pid = argdict["pid"] parsed["ppid"] = argdict["ppid"] else: parsed["pid"] = pid = argdict["process_identifier"] parsed["ppid"] = argdict[ "parent_process_identifier"] modulepath = argdict["module_path"] else: raise CuckooResultError( "I don't recognise the bson log contents.") # FILETIME is 100-nanoseconds from 1601 :/ vmtimeunix = (timelow + (timehigh << 32)) vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) parsed["first_seen"] = vmtime procname = get_filename_from_path(modulepath) parsed["process_path"] = modulepath parsed["process_name"] = procname parsed["command_line"] = argdict.get("command_line") # Is this a 64-bit process? if argdict.get("is_64bit"): self.is_64bit = True # Is this process being "tracked"? parsed["track"] = bool(argdict.get("track", 1)) self.pid = pid elif apiname == "__thread__": parsed["pid"] = pid = argdict["ProcessIdentifier"] # elif apiname == "__anomaly__": # tid = argdict["ThreadIdentifier"] # subcategory = argdict["Subcategory"] # msg = argdict["Message"] # self.handler.log_anomaly(subcategory, tid, msg) # return True else: parsed["type"] = "apicall" parsed["pid"] = self.pid parsed["api"] = apiname parsed["category"] = category parsed["status"] = argdict.pop("is_success", 1) parsed["return_value"] = argdict.pop("retval", 0) parsed["arguments"] = argdict parsed["flags"] = {} parsed["stacktrace"] = dec.get("s", []) parsed["uniqhash"] = dec.get("h", 0) if "e" in dec and "E" in dec: parsed["last_error"] = dec["e"] parsed["nt_status"] = dec["E"] if apiname in self.flags_value: self.resolve_flags(apiname, argdict, parsed["flags"]) if self.buffer_sha1: parsed["buffer"] = self.buffer_sha1 self.buffer_sha1 = None yield parsed
def read_next_message(self): data = self.handler.read(4) blen = struct.unpack("I", data)[0] if blen > MAX_MESSAGE_LENGTH: log.critical("BSON message larger than MAX_MESSAGE_LENGTH, " "stopping handler.") return False data += self.handler.read(blen-4) try: dec = bson_decode(data) except Exception as e: log.warning("BsonParser decoding problem {0} on " "data[:50] {1}".format(e, repr(data[:50]))) return False mtype = dec.get("type", "none") index = dec.get("I", -1) tid = dec.get("T", 0) time = dec.get("t", 0) caller = dec.get("R", 0) parentcaller = dec.get("P", 0) repeated = dec.get("r", 0) context = [index, repeated, 1, 0, tid, time, caller, parentcaller] if mtype == "info": # API call index info message, explaining the argument names, etc. name = dec.get("name", "NONAME") arginfo = dec.get("args", []) category = dec.get("category") # Bson dumps that were generated before cuckoomon exported the # "category" field have to get the category using the old method. if not category: # Try to find the entry/entries with this api name. category = [_ for _ in LOGTBL if _[0] == name] # If we found an entry, take its category, otherwise we take # the default string "unknown." category = category[0][1] if category else "unknown" argnames, converters = check_names_for_typeinfo(arginfo) self.infomap[index] = name, arginfo, argnames, converters, category elif mtype == "debug": log.info("Debug message from monitor: " "{0}".format(dec.get("msg", ""))) elif mtype == "new_process": # new_process message from VMI monitor. vmtime = datetime.datetime.fromtimestamp(dec.get("starttime", 0)) procname = dec.get("name", "NONAME") ppid = 0 modulepath = "DUMMY" self.handler.log_process(context, vmtime, None, ppid, modulepath, procname) else: # Regular api call. if index not in self.infomap: log.warning("Got API with unknown index - monitor needs " "to explain first: {0}".format(dec)) return True apiname, arginfo, argnames, converters, category = self.infomap[index] args = dec.get("args", []) if len(args) != len(argnames): log.warning("Inconsistent arg count (compared to arg names) " "on {2}: {0} names {1}".format(dec, argnames, apiname)) return True argdict = dict((argnames[i], converters[i](args[i])) for i in range(len(args))) if apiname == "__process__": # Special new process message from cuckoomon. timelow = argdict["TimeLow"] timehigh = argdict["TimeHigh"] # FILETIME is 100-nanoseconds from 1601 :/ vmtimeunix = (timelow + (timehigh << 32)) vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) pid = argdict["ProcessIdentifier"] ppid = argdict["ParentProcessIdentifier"] modulepath = argdict["ModulePath"] procname = get_filename_from_path(modulepath) self.handler.log_process(context, vmtime, pid, ppid, modulepath, procname) return True elif apiname == "__thread__": pid = argdict["ProcessIdentifier"] self.handler.log_thread(context, pid) return True elif apiname == "__environ__": self.handler.log_environ(context, argdict) return True # elif apiname == "__anomaly__": # tid = argdict["ThreadIdentifier"] # subcategory = argdict["Subcategory"] # msg = argdict["Message"] # self.handler.log_anomaly(subcategory, tid, msg) # return True context[2] = argdict.pop("is_success", 1) context[3] = argdict.pop("retval", 0) arguments = argdict.items() arguments += dec.get("aux", {}).items() self.handler.log_call(context, apiname, category, arguments) return True
def __iter__(self): self.fd.seek(0) while True: data = self.fd.read(4) if not data: return if not len(data) == 4: log.critical("BsonParser lacking data.") return blen = struct.unpack("I", data)[0] if blen > MAX_MESSAGE_LENGTH: log.critical("BSON message larger than MAX_MESSAGE_LENGTH, " "stopping handler.") return data += self.fd.read(blen - 4) if len(data) < blen: log.critical("BsonParser lacking data.") return try: dec = bson_decode(data) except Exception as e: log.warning("BsonParser decoding problem {0} on " "data[:50] {1}".format(e, repr(data[:50]))) return mtype = dec.get("type", "none") index = dec.get("I", -1) if mtype == "info": # API call index info message, explaining the argument names, etc. name = dec.get("name", "NONAME") arginfo = dec.get("args", []) category = dec.get("category") argnames, converters = check_names_for_typeinfo(arginfo) self.infomap[ index] = name, arginfo, argnames, converters, category continue tid = dec.get("T", 0) time = dec.get("t", 0) parsed = { "type": mtype, "tid": tid, "time": time, } if mtype == "debug": log.info("Debug message from monitor: {0}".format( dec.get("msg", ""))) parsed["message"] = dec.get("msg", "") else: # Regular api call from monitor if index not in self.infomap: log.warning("Got API with unknown index - monitor needs " "to explain first: {0}".format(dec)) continue apiname, arginfo, argnames, converters, category = self.infomap[ index] args = dec.get("args", []) if len(args) != len(argnames): log.warning( "Inconsistent arg count (compared to arg names) " "on {2}: {0} names {1}".format(dec, argnames, apiname)) continue argdict = dict((argnames[i], converters[i](args[i])) for i in range(len(args))) # Special new process message from the monitor. if apiname == "__process__": parsed["type"] = "process" if "TimeLow" in argdict: timelow = argdict["TimeLow"] timehigh = argdict["TimeHigh"] parsed["pid"] = pid = argdict["ProcessIdentifier"] parsed["ppid"] = argdict["ParentProcessIdentifier"] modulepath = argdict["ModulePath"] elif "time_low" in argdict: timelow = argdict["time_low"] timehigh = argdict["time_high"] if "pid" in argdict: parsed["pid"] = pid = argdict["pid"] parsed["ppid"] = argdict["ppid"] else: parsed["pid"] = pid = argdict["process_identifier"] parsed["ppid"] = argdict[ "parent_process_identifier"] modulepath = argdict["module_path"] else: raise CuckooResultError( "I don't recognise the bson log contents.") # FILETIME is 100-nanoseconds from 1601 :/ vmtimeunix = (timelow + (timehigh << 32)) vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) parsed["first_seen"] = vmtime procname = get_filename_from_path(modulepath) parsed["process_name"] = procname self.pid = pid elif apiname == "__thread__": parsed["pid"] = pid = argdict["ProcessIdentifier"] # elif apiname == "__anomaly__": # tid = argdict["ThreadIdentifier"] # subcategory = argdict["Subcategory"] # msg = argdict["Message"] # self.handler.log_anomaly(subcategory, tid, msg) # return True else: parsed["type"] = "apicall" parsed["pid"] = self.pid parsed["api"] = apiname parsed["category"] = category parsed["status"] = argdict.pop("is_success", 1) parsed["return_value"] = argdict.pop("retval", 0) parsed["arguments"] = argdict parsed["stacktrace"] = dec.get("s", []) parsed["uniqhash"] = dec.get("h", 0) if "e" in dec and "E" in dec: parsed["last_error"] = dec["e"] parsed["nt_status"] = dec["E"] if apiname in self.flags: for flag in self.flags[apiname].keys(): argdict[flag + "_s"] = self._flag_represent( apiname, flag, argdict[flag]) yield parsed