def get_parser(data, streamdata, sessid): """Guess or retrieve the parser based on the stream. Streams are retrieved from the "data" persistant storage variable, from the "streams" key. The parser for the main stream ((None, None, filename) in data['streams']) is cached for efficiency reasons in data['parser_cache']. """ # must remake parser EVERY TIME because parsers can't be pickled # (they contain generators which are currently not pickleable) # best I can do here is cache the parser, so at least we're not # taking time to re-guess the parser... if streamdata[0] is None: # original file stream = FileInputStream(data['filename'], real_filename = unicode(tmp_dir+sessid+'.file')) if 'parser_cache' in data: parser = data['parser_cache'](stream) else: parser = guessParser(stream) if not parser: print_parse_error() return (None, None) data['parser_cache'] = parser.__class__ save_data(data, sessid) elif isinstance(streamdata[0], tuple): prevstream, prevparser = get_parser(data, streamdata[0], sessid) stream = prevparser[streamdata[1]].getSubIStream() parser = guessParser(stream) else: stream = StringInputStream(streamdata[1]) stream.tags = streamdata[0] parser = guessParser(stream) return stream, parser
def search(self, file_path, strings=None): try: self.stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path) except NullStreamError: return False patterns = PatternMatching() for s in strings: patterns.addString(s) start = 0 end = self.stream.size self.data = self.stream.readBytes(start, end//8) return patterns.search(self.data)
def subfile(self, filePath): # hachoir-subfile is a tool based on hachoir-parser to find subfiles in any binary stream. # Website: http://bitbucket.org/haypo/hachoir/wiki/hachoir-subfile # bypass sys.stdout, sys.stderr oldStdOut = sys.stdout oldStdErr = sys.stderr outputStdErr = StringIO.StringIO() outputStdOut = StringIO.StringIO() sys.stdout = outputStdOut sys.stderr = outputStdErr stream = FileInputStream(unicodeFilename(filePath), real_filename=filePath) # Search for subfiles subfile = SearchSubfile(stream, 0, None) subfile.loadParsers(categories=None, parser_ids=None) subfile.main() # sys.stdout, sys.stderr reset sys.stdout = oldStdOut sys.stderr = oldStdErr # parse stdout, stderr from SearchSubfile return self.parse(outputStdOut.getvalue(), outputStdErr.getvalue())
def save_response_binaries(self, path, hash_value): try: flow = Flow.objects.get(hash_value=hash_value) flow_details = flow.details for detail in flow_details: # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat source_str = ":".join([detail.src_ip, str(detail.sport)]) destination_str = ":".join([detail.dst_ip, str(detail.dport)]) flow_str = "-".join([source_str, destination_str]) resp_file = "_".join(["contents", flow_str,"resp.dat"]) file_path = "/".join([path, resp_file]) file_path = str(file_path) try: stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path) except NullStreamError: continue subfile = SearchSubfile(stream, 0, None) subfile.loadParsers() root = "/".join([path, "html-files"]) if not os.path.exists(root): os.makedirs(root) output = "/".join([root, flow_str]) output = str(output) if not os.path.exists(output): os.mkdir(output) subfile.setOutput(output) ok = subfile.main() # save the files info at the db also return True except Exception, ex: return False
def createParser(filename, real_filename=None): """ Create a parser from a file or returns None on error. Options: - filename (unicode): Input file name ; - real_filename (str|unicode): Real file name. """ return guessParser(FileInputStream(filename, real_filename))
def createParser(filename, real_filename=None, tags=None): """ Create a parser from a file or returns None on error. Options: - filename (unicode): Input file name ; - real_filename (str|unicode): Real file name. """ if not tags: tags = [] stream = FileInputStream(filename, real_filename, tags=tags) return guessParser(stream)
class Handler: def __init__(self): self.file_path = None self.file_name = None self.stream = None self.data = None self.log = Logger("File Handler", "DEBUG") def create_dir(self): now = datetime.datetime.now() self.log.message("Now is: %s:" % now) directory_name = now.strftime("%d-%m-%y") self.log.message("Directory name: %s:" % directory_name) directory_path = "/".join([settings.PROJECT_ROOT, "uploads", directory_name]) self.log.message("Directory path: %s" % directory_path) if not os.path.exists(directory_path): os.mkdir(directory_path) self.log.message("Directory created") # we need to create another directory also for each upload new_dir = generate_name_from_timestame() new_dir_path = "/".join([directory_path, new_dir]) if not os.path.exists(new_dir_path): os.mkdir(new_dir_path) self.log.message("Directory created") self.upload_dir = new_dir_path def save_file(self, f): self.file_name = f.name self.file_path = "/".join([self.upload_dir, self.file_name]) destination = open(self.file_path, 'wb+') for chunk in f.chunks(): destination.write(chunk) destination.close() def search(self, file_path, strings=None): try: self.stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path) except NullStreamError: return False patterns = PatternMatching() for s in strings: patterns.addString(s) start = 0 end = self.stream.size self.data = self.stream.readBytes(start, end//8) return patterns.search(self.data) def reset_data(self): self.data = None
def main(): usage = "usage: %prog <file_name>" op = OptionParser(usage) (options, args) = op.parse_args() if len(args) != 1: op.print_help() sys.exit(1) inputFileName = unicode(args[0]) try: stream = FileInputStream(inputFileName) except InputStreamError, err: exit("Unable to open file: %s" % err)
def convert_gzip_files(self, path, hash_value): try: flow = Flow.objects.get(hash_value=hash_value) flow_details = flow.details for detail in flow_details: # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat source_str = ":".join([detail.src_ip, str(detail.sport)]) destination_str = ":".join([detail.dst_ip, str(detail.dport)]) flow_str = "-".join([source_str, destination_str]) resp_file = "_".join(["contents", flow_str,"resp.dat"]) file_path = "/".join([path, resp_file]) # path is created as unicode, convert it a regular string for hachoir operation file_path = str(file_path) try: stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path) except NullStreamError: continue subfile = SearchSubfile(stream, 0, None) subfile.loadParsers() root = "/".join([path, "html-files"]) if not os.path.exists(root): os.makedirs(root) output = "/".join([root, flow_str]) output = str(output) subfile.setOutput(output) http_details = filter(lambda x: x.flow_details.id == detail.id ,HTTPDetails.objects.filter(http_type="response")) file_ext = ".txt" for http in http_details: if http.content_type: filename = subfile.output.createFilename(file_ext) if http.content_encoding == "gzip": r = open("/".join([output, filename]), "r") body = r.read() r.close() data = StringIO.StringIO(body) gzipper = gzip.GzipFile(fileobj=data) html = gzipper.read() filename = filename.split(".")[0] + ".html" w = open("/".join([output, filename]), "w") w.write(html) w.close() return True except Exception, ex: print ex return False
def get_parser(data, streamdata, sessid): """Guess or retrieve the parser based on the stream. Streams are retrieved from the "data" persistant storage variable, from the "streams" key. The parser for the main stream ((None, None, filename) in data['streams']) is cached for efficiency reasons in data['parser_cache']. """ # must remake parser EVERY TIME because parsers can't be pickled # (they contain generators which are currently not pickleable) # best I can do here is cache the parser, so at least we're not # taking time to re-guess the parser... if streamdata[0] is None: # original file stream = FileInputStream(data['filename'], real_filename=unicode(tmp_dir + sessid + '.file')) if 'parser_cache' in data: parser = data['parser_cache'](stream) else: parser = guessParser(stream) if not parser: print_parse_error() return (None, None) data['parser_cache'] = parser.__class__ save_data(data, sessid) elif isinstance(streamdata[0], tuple): prevstream, prevparser = get_parser(data, streamdata[0], sessid) stream = prevparser[streamdata[1]].getSubIStream() parser = guessParser(stream) else: stream = StringInputStream(streamdata[1]) stream.tags = streamdata[0] parser = guessParser(stream) return stream, parser
def _verify_download(self, file_name=None): """ Checks the saved file to see if it was actually valid, if not then consider the download a failure. """ result = True # primitive verification of torrents, just make sure we didn't get a text file or something if GenericProvider.TORRENT == self.providerType: parser = stream = None try: stream = FileInputStream(file_name) parser = guessParser(stream) except: pass result = parser and 'application/x-bittorrent' == parser.mime_type try: stream._input.close() except: pass return result
def save_response_files(self, path, hash_value): try: flow = Flow.objects.get(hash_value=hash_value) flow_details = flow.details for detail in flow_details: # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat source_str = ":".join([detail.src_ip, str(detail.sport)]) destination_str = ":".join([detail.dst_ip, str(detail.dport)]) flow_str = "-".join([source_str, destination_str]) resp_file = "_".join(["contents", flow_str,"resp.dat"]) file_path = "/".join([path, resp_file]) # path is created as unicode, convert it a regular string for hachoir operation file_path = str(file_path) strings = ["Content-Type: text/html", "Content-Type: application/x-javascript", "Content-Type: text/css"] file_handler = FileHandler() responses = [] search_li = file_handler.search(file_path, strings) if not search_li: continue for item in search_li: responses.append(item[0]) empty_lines = [] strings = ["\r\n\r\n"] search_li = file_handler.search(file_path, strings) if not search_li: continue for item in search_li: empty_lines.append(item[0]) http_lines = [] strings = ["HTTP/1.1"] search_li = file_handler.search(file_path, strings) if not search_li: continue for item in search_li: http_lines.append(item[0]) try: stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path) except NullStreamError: continue subfile = SearchSubfile(stream, 0, None) subfile.loadParsers() root = "/".join([path, "html-files"]) if not os.path.exists(root): os.makedirs(root) output = "/".join([root, flow_str]) output = str(output) subfile.setOutput(output) for x in range(len(responses)): # here i have the request header data = file_handler.data #f = data[empty_lines[x]:http_lines[x+1]] file_ext = ".txt" #if ("html" in f or "body" in f): # file_ext = ".html" #elif ("script" in f): # file_ext = ".js" #else: # select the closest empty line empty_lines.append(responses[x]) empty_lines.sort() index = empty_lines.index(responses[x]) offset = empty_lines[index+1] size = None try: size = http_lines[x+1]-2 except IndexError: size = stream.size f = data[offset+4:size] filename = subfile.output.createFilename(file_ext) w = open("/".join([output, filename]), "w") w.write(f) w.close() # saving the hachoir saved binaries to the db with the created txt files if detail.protocol == "http": http_files = os.listdir(output) #http_files = filter(lambda x: x.split(".")[-1] != 'txt', http_files) # no need to take the txt files if len(http_files) > 0: http_li = filter(lambda x: x.flow_details.id == detail.id, HTTPDetails.objects.all()) for http in http_li: http.files = http_files http.save() return True except Exception, ex: print ex return False
def file_subfiles(filename): if filename and filename != "": offset, size, memorylimit, filemaxsize = 0, 999999, 50 * 1024 * 1024, 100 * 1024 * 1024 stream = FileInputStream(unicodeFilename(filename), real_filename=filename) subfile = SearchSubfile(stream, offset, size) try: subfile.loadParsers() subfile.stats = {} subfile.verbose = False subfile.next_offset = None subfiles = [] while subfile.current_offset < subfile.size: _ = subfile.datarate.update(subfile.current_offset) for offset, parser in subfile.findMagic( subfile.current_offset): try: size = parser.content_size // 8 if parser.content_size else None except Exception as ex: size = None try: description = parser.description if not ( parser.content_size ) or parser.content_size // 8 < filemaxsize else parser.__class__.__name__ except Exception as ex: description = None offset = offset // 8 # skip the first subfile # as its the original file itself if offset == 0: continue with open(filename, "rb") as fo: filedata = fo.read() mimetype = data_mimetype( filedata[offset:offset + size] ) if offset > 0 and size and size > 0 else None md5 = data_hashes( filedata[offset:offset + size], "md5") if offset >= 0 and size > 0 else None sha256 = data_hashes( filedata[offset:offset + size], "sha256") if (offset or offset == 0) and size else None ssdeep = data_hashes( filedata[offset:offset + size], "ssdeep") if (offset or offset == 0) and size else None subfiles.append({ "offset": offset, "size": size, "mimetype": mimetype, "description": description, "hashes": { "md5": md5, "sha256": sha256, "ssdeep": ssdeep } }) subfile.current_offset += subfile.slice_size if subfile.next_offset: subfile.current_offset = max(subfile.current_offset, subfile.next_offset) subfile.current_offset = min(subfile.current_offset, subfile.size) except MemoryError: error("[!] Memory error!") return subfiles