def test_basename(self): assert Storage.get_filename_from_path("C:\\a.txt") == "a.txt" assert Storage.get_filename_from_path("C:/a.txt") == "a.txt" assert Storage.get_filename_from_path("C:\\\x00a.txt") == "\x00a.txt" assert Storage.get_filename_from_path("/tmp/a.txt") == "a.txt" assert Storage.get_filename_from_path("../../b.txt") == "b.txt" assert Storage.get_filename_from_path("..\\..\\c.txt") == "c.txt"
def get_files(self, submit_id, password=None, astree=False): """ Returns files or URLs from a submitted analysis. @param password: The password to unlock container archives with @param astree: sflock option; determines the format in which the files are returned @return: A tree of files """ submit = db.view_submit(submit_id) files, duplicates = [], [] for data in submit.data["data"]: if data["type"] == "file": filename = Storage.get_filename_from_path(data["data"]) filepath = os.path.join(submit.tmp_path, filename) unpacked = sflock.unpack( filepath=filepath, password=password, duplicates=duplicates ) if astree: unpacked = unpacked.astree(sanitize=True) files.append(unpacked) elif data["type"] == "url": files.append({ "filename": data["data"], "filepath": "", "relapath": "", "selected": True, "size": 0, "type": "url", "package": "ie", "extrpath": [], "duplicate": False, "children": [], "mime": "text/html", "finger": { "magic_human": "url", "magic": "url" } }) else: raise RuntimeError( "Unknown data entry type: %s" % data["type"] ) return files, submit.data["errors"], submit.data["options"]
def pre(self, submit_type, data, options=None): """ The first step to submitting new analysis. @param submit_type: "files" or "strings" @param data: a list of dicts containing "name" (file name) and "data" (file data) or a list of strings (urls or hashes) @return: submit id """ if submit_type not in ("strings", "files"): log.error("Bad parameter '%s' for submit_type", submit_type) return False path_tmp = Folders.create_temp() submit_data = { "data": [], "errors": [], "options": options or {}, } if submit_type == "strings": for line in data: self._handle_string(submit_data, path_tmp, line.strip()) if submit_type == "files": for entry in data: filename = Storage.get_filename_from_path(entry["name"]) filepath = Files.create(path_tmp, filename, entry["data"]) submit_data["data"].append({ "type": "file", "data": filepath, "options": self.translate_options_to(entry.get("options", {})), }) return db.add_submit(path_tmp, submit_type, submit_data)
def pre(self, submit_type, data, options=None): """ The first step to submitting new analysis. @param submit_type: "files" or "strings" @param data: a list of dicts containing "name" (file name) and "data" (file data) or a list of strings (urls or hashes) @return: submit id """ if submit_type not in ("strings", "files"): log.error("Bad parameter '%s' for submit_type", submit_type) return False path_tmp = Folders.create_temp() submit_data = { "data": [], "errors": [], "options": options or {}, } if submit_type == "strings": for line in data: self._handle_string(submit_data, path_tmp, line.strip()) if submit_type == "files": for entry in data: filename = Storage.get_filename_from_path(entry["name"]) filepath = Files.create(path_tmp, filename, entry["data"]) submit_data["data"].append({ "type": "file", "data": filepath, "options": self.translate_options_to( entry.get("options", {}) ), }) return db.add_submit(path_tmp, submit_type, submit_data)
def __iter__(self): self.fd.seek(0) while True: data = self.fd.read(4) if not data: return if len(data) != 4: log.critical("BsonParser lacking data.") return blen = struct.unpack("I", data)[0] if blen > MAX_MESSAGE_LENGTH: log.critical( "BSON message larger than MAX_MESSAGE_LENGTH, " "stopping handler." ) return data += self.fd.read(blen-4) if len(data) < blen: log.critical("BsonParser lacking data.") return try: dec = bson_decode(data) except Exception as e: log.warning( "BsonParser decoding problem %s on data[:50] %s", e, repr(data[:50]) ) return mtype = dec.get("type", "none") index = dec.get("I", -1) if mtype == "info": # API call index info message, explaining the argument names, etc. name = dec.get("name", "NONAME") arginfo = dec.get("args", []) category = dec.get("category") argnames, converters = self.determine_unserializers(arginfo) self.infomap[index] = name, arginfo, argnames, converters, category if dec.get("flags_value"): self.flags_value[name] = {} for arg, values in dec["flags_value"].items(): self.flags_value[name][arg] = dict(values) if dec.get("flags_bitmask"): self.flags_bitmask[name] = {} for arg, values in dec["flags_bitmask"].items(): self.flags_bitmask[name][arg] = values continue # Handle dumped buffers. if mtype == "buffer": buf = dec.get("buffer") sha1 = dec.get("checksum") self.buffer_sha1 = hashlib.sha1(buf).hexdigest() # TODO Reimplement storing of buffers. This has not been done # yet in the new resultserver # Why do we pass along a sha1 checksum again? if sha1 != self.buffer_sha1: log.warning("Incorrect sha1 passed along for a buffer.") filepath = cwd("buffer", self.buffer_sha1, analysis=self.task_id) with open(filepath, "wb") as f: f.write(buf) continue tid = dec.get("T", 0) time = dec.get("t", 0) parsed = { "type": mtype, "tid": tid, "time": time, } if mtype == "debug": parsed["message"] = dec.get("msg", "") log.info("Debug message from monitor: %s", parsed["message"]) else: # Regular api call from monitor if index not in self.infomap: log.warning("Got API with unknown index - monitor needs " "to explain first: {0}".format(dec)) continue apiname, arginfo, argnames, converters, category = self.infomap[index] args = dec.get("args", []) if len(args) != len(argnames): log.warning( "Inconsistent arg count (compared to arg names) " "on %s: %s names %s", dec, argnames, apiname ) continue argdict = {} for idx, value in enumerate(args): argdict[argnames[idx]] = converters[idx](value) # Special new process message from the monitor. if apiname == "__process__": parsed["type"] = "process" if "TimeLow" in argdict: timelow = argdict["TimeLow"] timehigh = argdict["TimeHigh"] parsed["pid"] = pid = argdict["ProcessIdentifier"] parsed["ppid"] = argdict["ParentProcessIdentifier"] modulepath = argdict["ModulePath"] elif "time_low" in argdict: timelow = argdict["time_low"] timehigh = argdict["time_high"] if "pid" in argdict: parsed["pid"] = pid = argdict["pid"] parsed["ppid"] = argdict["ppid"] else: parsed["pid"] = pid = argdict["process_identifier"] parsed["ppid"] = argdict["parent_process_identifier"] modulepath = argdict["module_path"] else: raise CuckooResultError( "I don't recognize the bson log contents." ) # FILETIME is 100-nanoseconds from 1601 :/ vmtimeunix = (timelow + (timehigh << 32)) vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) parsed["first_seen"] = vmtime procname = Storage.get_filename_from_path(modulepath) parsed["process_path"] = modulepath parsed["process_name"] = procname parsed["command_line"] = argdict.get("command_line") # Is this a 64-bit process? if argdict.get("is_64bit"): self.is_64bit = True # Is this process being "tracked"? parsed["track"] = bool(argdict.get("track", 1)) parsed["modules"] = argdict.get("modules", {}) self.pid = pid elif apiname == "__thread__": parsed["pid"] = pid = argdict["ProcessIdentifier"] # elif apiname == "__anomaly__": # tid = argdict["ThreadIdentifier"] # subcategory = argdict["Subcategory"] # msg = argdict["Message"] # self.handler.log_anomaly(subcategory, tid, msg) # return True elif apiname == "__action__": parsed["type"] = "action" parsed["action"] = argdict["action"] else: parsed["type"] = "apicall" parsed["pid"] = self.pid parsed["api"] = apiname parsed["category"] = category parsed["status"] = argdict.pop("is_success", 1) parsed["return_value"] = argdict.pop("retval", 0) parsed["arguments"] = argdict parsed["flags"] = {} parsed["stacktrace"] = dec.get("s", []) parsed["uniqhash"] = dec.get("h", 0) if "e" in dec and "E" in dec: parsed["last_error"] = dec["e"] parsed["nt_status"] = dec["E"] if apiname in self.flags_value: self.resolve_flags(apiname, argdict, parsed["flags"]) if self.buffer_sha1: parsed["buffer"] = self.buffer_sha1 self.buffer_sha1 = None yield parsed
def __iter__(self): self.fd.seek(0) while True: data = self.fd.read(4) if not data: return if len(data) != 4: log.critical("BsonParser lacking data.") return blen = struct.unpack("I", data)[0] if blen > MAX_MESSAGE_LENGTH: log.critical( "BSON message larger than MAX_MESSAGE_LENGTH, " "stopping handler." ) return data += self.fd.read(blen-4) if len(data) < blen: log.critical("BsonParser lacking data.") return try: dec = bson_decode(data) except Exception as e: log.warning( "BsonParser decoding problem %s on data[:50] %s", e, repr(data[:50]) ) return mtype = dec.get("type", "none") index = dec.get("I", -1) if mtype == "info": # API call index info message, explaining the argument names, etc. name = dec.get("name", "NONAME") arginfo = dec.get("args", []) category = dec.get("category") argnames, converters = self.determine_unserializers(arginfo) self.infomap[index] = name, arginfo, argnames, converters, category if dec.get("flags_value"): self.flags_value[name] = {} for arg, values in dec["flags_value"].items(): self.flags_value[name][arg] = dict(values) if dec.get("flags_bitmask"): self.flags_bitmask[name] = {} for arg, values in dec["flags_bitmask"].items(): self.flags_bitmask[name][arg] = values continue # Handle dumped buffers. if mtype == "buffer": buf = dec.get("buffer") sha1 = dec.get("checksum") self.buffer_sha1 = hashlib.sha1(buf).hexdigest() # Why do we pass along a sha1 checksum again? if sha1 != self.buffer_sha1: log.warning("Incorrect sha1 passed along for a buffer.") # If the parent is netlogs ResultHandler then we actually dump # it - this should only be the case during the analysis, any # after processing will then be ignored. from cuckoo.core.resultserver import ResultHandler if isinstance(self.fd, ResultHandler): filepath = os.path.join( self.fd.storagepath, "buffer", self.buffer_sha1 ) with open(filepath, "wb") as f: f.write(buf) continue tid = dec.get("T", 0) time = dec.get("t", 0) parsed = { "type": mtype, "tid": tid, "time": time, } if mtype == "debug": parsed["message"] = dec.get("msg", "") log.info("Debug message from monitor: %s", parsed["message"]) else: # Regular api call from monitor if index not in self.infomap: log.warning("Got API with unknown index - monitor needs " "to explain first: {0}".format(dec)) continue apiname, arginfo, argnames, converters, category = self.infomap[index] args = dec.get("args", []) if len(args) != len(argnames): log.warning( "Inconsistent arg count (compared to arg names) " "on %s: %s names %s", dec, argnames, apiname ) continue argdict = {} for idx, value in enumerate(args): argdict[argnames[idx]] = converters[idx](value) # Special new process message from the monitor. if apiname == "__process__": parsed["type"] = "process" if "TimeLow" in argdict: timelow = argdict["TimeLow"] timehigh = argdict["TimeHigh"] parsed["pid"] = pid = argdict["ProcessIdentifier"] parsed["ppid"] = argdict["ParentProcessIdentifier"] modulepath = argdict["ModulePath"] elif "time_low" in argdict: timelow = argdict["time_low"] timehigh = argdict["time_high"] if "pid" in argdict: parsed["pid"] = pid = argdict["pid"] parsed["ppid"] = argdict["ppid"] else: parsed["pid"] = pid = argdict["process_identifier"] parsed["ppid"] = argdict["parent_process_identifier"] modulepath = argdict["module_path"] else: raise CuckooResultError( "I don't recognize the bson log contents." ) # FILETIME is 100-nanoseconds from 1601 :/ vmtimeunix = (timelow + (timehigh << 32)) vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) parsed["first_seen"] = vmtime procname = Storage.get_filename_from_path(modulepath) parsed["process_path"] = modulepath parsed["process_name"] = procname parsed["command_line"] = argdict.get("command_line") # Is this a 64-bit process? if argdict.get("is_64bit"): self.is_64bit = True # Is this process being "tracked"? parsed["track"] = bool(argdict.get("track", 1)) parsed["modules"] = argdict.get("modules", {}) self.pid = pid elif apiname == "__thread__": parsed["pid"] = pid = argdict["ProcessIdentifier"] # elif apiname == "__anomaly__": # tid = argdict["ThreadIdentifier"] # subcategory = argdict["Subcategory"] # msg = argdict["Message"] # self.handler.log_anomaly(subcategory, tid, msg) # return True elif apiname == "__action__": parsed["type"] = "action" parsed["action"] = argdict["action"] else: parsed["type"] = "apicall" parsed["pid"] = self.pid parsed["api"] = apiname parsed["category"] = category parsed["status"] = argdict.pop("is_success", 1) parsed["return_value"] = argdict.pop("retval", 0) parsed["arguments"] = argdict parsed["flags"] = {} parsed["stacktrace"] = dec.get("s", []) parsed["uniqhash"] = dec.get("h", 0) if "e" in dec and "E" in dec: parsed["last_error"] = dec["e"] parsed["nt_status"] = dec["E"] if apiname in self.flags_value: self.resolve_flags(apiname, argdict, parsed["flags"]) if self.buffer_sha1: parsed["buffer"] = self.buffer_sha1 self.buffer_sha1 = None yield parsed