def external_process(self,fd): dbh=DB.DBO(self.case) dbh._warnings = False dbh.mass_insert_start('ie_history') inode_id = self.fd.lookup_id() ## Find our path dbh.execute("select path from file where inode_id = %r", inode_id) row = dbh.fetch() path = row['path'] history = IECache.IEHistoryFile(fd) for event in history: if event: url = event['url'].get_value() url.inclusive = False url = url.get_value() ## How big is the entry size = event['size'].get_value() * IECache.blocksize args = dict(inode_id = inode_id, type = event['type'], offset = event['offset'], length = size, url = url, filename = event['filename'], headers = event['data'].get_value(),) modified = event['modified_time'].get_value() if modified>1000: args['_modified'] = 'from_unixtime(%d)' % modified else: modified = None accessed = event['accessed_time'].get_value() if accessed>1000: args['_accessed'] = 'from_unixtime(%d)' % accessed else: accessed = None dbh.mass_insert(**args) ## Try to locate the actual inode try: index = event['directory_index'].get_value() tmp_path = FlagFramework.normpath((FlagFramework.joinpath([ path, history.directories[index]]))) except: continue dbh.execute("select inode, inode_id from file where path='%s/' and name=%r", tmp_path, args['filename']) row = dbh.fetch() if row: inode_id = row['inode_id'] headers = args['headers'] ## We always create a new inode for cache ## entries to guarantee they get scanned by ## other scanners _after_ http info is ## populated. This essentially means we get ## duplicated inodes for the same actual files ## which is a bit of extra overhead (cache ## files are processed twice). encoding_driver = "|o0" m = content_encoding_re.search(headers) if m: ## Is it gzip encoding? if m.group(1) == 'gzip': encoding_driver = "|G1" elif m.group(1) == 'deflate': encoding_driver = '|d1' else: print "I have no idea what %s encoding is" % m.group(1) inode_id = self.ddfs.VFSCreate(None, "%s%s" % (row['inode'], encoding_driver), "%s/%s" % (tmp_path, args['filename']), size = size, _mtime = modified, _atime = accessed ) http_args = dict( inode_id = inode_id, url = url_unquote(url), ) ## Put in a dodgy pcap entry for the timestamp: if '_accessed' in args: dbh.insert('pcap', _fast=True, _ts_sec = args['_accessed'], ts_usec = 0, offset=0, length=0) packet_id = dbh.autoincrement() http_args['response_packet'] = packet_id http_args['request_packet'] = packet_id ## Populate http table if possible m = content_type_re.search(headers) if m: http_args['content_type'] = m.group(1) host = FlagFramework.find_hostname(url) if host: http_args['host'] = host http_args['tld'] = FlagFramework.make_tld(host) dbh.insert('http', _fast=True, **http_args ) ## Now populate the http parameters from the ## URL GET parameters: try: base, query = url.split("?",1) qs = cgi.parse_qs(query) for k,values in qs.items(): for v in values: dbh.insert('http_parameters', _fast=True, inode_id = inode_id, key = k, value = v) except ValueError: pass ## Scan new files using the scanner train: fd=self.ddfs.open(inode_id=inode_id) Scanner.scanfile(self.ddfs,fd,self.factories)
def external_process(self, fd): #find the other files we need in order to process cache s = self.fd.stat() filename = "%s%s" % (s['path'], s['name']) data_fds = [ self.ddfs.open("%s_CACHE_001_" % s['path']), self.ddfs.open("%s_CACHE_002_" % s['path']), self.ddfs.open("%s_CACHE_003_" % s['path']) ] mozcache = MozCache.MozCache(fd, data_fds) #print mozcache dbh = DB.DBO(self.case) # process each cache record for record in mozcache.records(): meta = record.get_entry() (method, status, header) = parse_response(meta['MetaData']) # deal with content-encoding (gzip/deflate) encoding_driver = "" encoding = header.getheader("content-encoding") if encoding: if "gzip" in encoding.lower(): encoding_driver = "|G1" elif "deflate" in encoding.lower(): encoding_driver = "|d1" # locate embedded entries length = 0 if record.record['DataLocation']['DataFile'] != 0: fileidx, offset, length = record.get_data_location() inode = '%s|o%s:%s' % (data_fds[fileidx].inode, offset, length) else: inode = self.ddfs.lookup(path="%s%08Xd01" % (s['path'], record.record['HashNumber'].get_value()))[1] inode += "|o0" # differentiate the inode from the existing one # add new entry to the VFS if encoding: length=0 inode_id = self.ddfs.VFSCreate(None, "%s%s" % (inode, encoding_driver), "%s/%08Xd01" % (filename, record.record['HashNumber'].get_value()), _mtime=meta['LastModified'], _atime=meta['LastFetched'], size=length) ## Insert a dodgy pcap entry to represent the ## timestamp of this request dbh.insert('pcap', _fast=True, _ts_sec = 'from_unixtime(%d)' % meta['LastModified'], ts_usec = 0, offset=0, length=0) packet_id = dbh.autoincrement() # add to http table # we parse the date, it is automatically returned in case # timezone. We do not need to supply an evidence timezone as # http date strings contain a timezone specification. try: date = Time.parse(header.getheader("date"), case=self.case, evidence_tz=None) except TypeError: date = 0 # chomp NULL from end url = str(meta['KeyData'])[:-1] if url.startswith("HTTP:"): url = url[len("HTTP:"):] args = dict(inode_id=inode_id, ## urls are always stored normalised in the db url=url_unquote(url), request_packet = packet_id, response_packet = packet_id, method=method, status=status, content_type=header.getheader("content-type"), date=date) host = FlagFramework.find_hostname(url) if host: args['host'] = host args['tld'] = FlagFramework.make_tld(host) dbh.insert("http", _fast=True, **args) ## Now populate the http parameters from the ## URL GET parameters: try: base, query = url.split("?",1) qs = cgi.parse_qs(query) for k,values in qs.items(): for v in values: dbh.insert('http_parameters', _fast=True, inode_id = inode_id, key = k, value = v) except ValueError: pass ## Scan the new file using the scanner train: fd=self.ddfs.open(inode_id=inode_id) Scanner.scanfile(self.ddfs,fd,self.factories)