def get_parser(data, streamdata, sessid): """Guess or retrieve the parser based on the stream. Streams are retrieved from the "data" persistant storage variable, from the "streams" key. The parser for the main stream ((None, None, filename) in data['streams']) is cached for efficiency reasons in data['parser_cache']. """ # must remake parser EVERY TIME because parsers can't be pickled # (they contain generators which are currently not pickleable) # best I can do here is cache the parser, so at least we're not # taking time to re-guess the parser... if streamdata[0] is None: # original file stream = FileInputStream(data['filename'], real_filename = unicode(tmp_dir+sessid+'.file')) if 'parser_cache' in data: parser = data['parser_cache'](stream) else: parser = guessParser(stream) if not parser: print_parse_error() return (None, None) data['parser_cache'] = parser.__class__ save_data(data, sessid) elif isinstance(streamdata[0], tuple): prevstream, prevparser = get_parser(data, streamdata[0], sessid) stream = prevparser[streamdata[1]].getSubIStream() parser = guessParser(stream) else: stream = StringInputStream(streamdata[1]) stream.tags = streamdata[0] parser = guessParser(stream) return stream, parser
def on_field_parse_substream(self, dispatcher, field): stream = field.getSubIStream() parser = guessParser(stream) if not parser: return subfile = FileFromInputStream(stream) subfile.name = field.path new_window(self, subfile, parser, subfile.name)
def get_parser(data, streamdata, sessid): """Guess or retrieve the parser based on the stream. Streams are retrieved from the "data" persistant storage variable, from the "streams" key. The parser for the main stream ((None, None, filename) in data['streams']) is cached for efficiency reasons in data['parser_cache']. """ # must remake parser EVERY TIME because parsers can't be pickled # (they contain generators which are currently not pickleable) # best I can do here is cache the parser, so at least we're not # taking time to re-guess the parser... if streamdata[0] is None: # original file stream = FileInputStream(data['filename'], real_filename=unicode(tmp_dir + sessid + '.file')) if 'parser_cache' in data: parser = data['parser_cache'](stream) else: parser = guessParser(stream) if not parser: print_parse_error() return (None, None) data['parser_cache'] = parser.__class__ save_data(data, sessid) elif isinstance(streamdata[0], tuple): prevstream, prevparser = get_parser(data, streamdata[0], sessid) stream = prevparser[streamdata[1]].getSubIStream() parser = guessParser(stream) else: stream = StringInputStream(streamdata[1]) stream.tags = streamdata[0] parser = guessParser(stream) return stream, parser
def parse_raw(raw): stream = InputIOStream(raw) parser = guessParser(stream) return extract_metadata(parser)
def handle_form(): """Process submitted data. See comments for details. """ prune_old() form = cgi.FieldStorage() if 'file' in form and form['file'].file: # compute session id sessid = get_sessid() if not sessid: rand = str(time.time()) + form['file'].filename + str( random.random()) sessid = hashlib.md5(rand).hexdigest() # write uploaded file f = open(tmp_dir + sessid + '.file', 'wb') if form['file'].done == -1: raise ValueError("File upload canceled?") while f.tell() < 2**22: # 4MB limit chunk = form['file'].file.read(32768) # 32KB chunks if not chunk: break f.write(chunk) if f.tell() == 0: f.close() print_form('Nothing uploaded.') return f.close() # write session variables try: fn = unicode(form['file'].filename, 'utf-8') except UnicodeDecodeError: fn = unicode(form['file'].filename, 'iso-8859-1') # stream "None" represents the original stream save_data({'filename': fn, 'streams': [(None, None, fn)]}, sessid) # send session id and reset variables c = SimpleCookie() c['sess'] = sessid c['hpath'] = '/' # clear path var. c['stream'] = '0' # clear stream var print c # send cookie to client (headers) print_page() # print AJAX frame page elif get_sessid(): # or perhaps you already have a file to parse? if not 'hpath' in form: print_page() return # redirect stderr, so we can catch parser errors sys.stderr = StringIO() # load variables hpath = cgi.escape(form.getfirst('hpath', '/')) stream_id = int(form.getfirst('stream', '0')) path = hpath.split(':')[stream_id] sessid = get_sessid() try: data = cPickle.load(file(tmp_dir + sessid + '.sess', 'rb')) except IOError: print_error('Your file was deleted due to inactivity. ' 'Please upload a new one.') return stream, parser = get_parser(data, data['streams'][stream_id], sessid) if parser is None: return # sorry, couldn't parse file! if 'save' in form: # "Download Raw" f = FileFromInputStream(stream) fld = parser[path] f.seek(fld.absolute_address / 8) size = alignValue(fld.size, 8) / 8 sys.stdout.write('Content-Type: application/octet-stream\r\n') sys.stdout.write('Content-Length: %i\r\n' % size) sys.stdout.write('Content-Disposition: attachment; ' 'filename=%s\r\n\r\n' % path.strip('/').split('/')[-1]) sys.stdout.write(f.read(size)) return elif 'savesub' in form: # "Download Substream" stream = parser[path.rstrip('/')].getSubIStream() filename = path.strip('/').split('/')[-1] tags = getattr(stream, 'tags', []) for tag in tags: if tag[0] == 'filename': filename = tag[1] sys.stdout.write('Content-Type: application/octet-stream\r\n') sys.stdout.write('Content-Disposition: attachment; ' 'filename=%s\r\n\r\n' % filename) sys.stdout.write(FileFromInputStream(stream).read()) return elif 'addStream' in form: # "Parse Substream" spath = cgi.escape(form['addStream'].value) new_stream = parser[spath.rstrip('/')].getSubIStream() streamdata = FileFromInputStream(new_stream).read() new_parser = guessParser(new_stream) if new_parser: stream = new_stream parser = new_parser tags = getattr(stream, 'tags', []) streamname = data['streams'][stream_id][2] + ':' data['streams'].append((tags, streamdata, streamname + spath)) try: if force_substream_ref: raise Exception("Use references for all substreams") save_data(data, sessid) except Exception: # many things could go wrong with pickling data['streams'][-1] = (data['streams'][stream_id], spath, streamname + spath) save_data(data, sessid) path = '/' hpath += ':/' stream_id = len(data['streams']) - 1 else: sys.stderr.write("Cannot parse substream %s: " "No suitable parser\n" % spath) elif 'delStream' in form: # "Delete Stream" n = int(form['delStream'].value) paths = hpath.split(':') del paths[n] del data['streams'][n] if n >= len(data['streams']): stream_id = 0 else: stream_id = n path = paths[stream_id] hpath = ':'.join(paths) save_data(data, sessid) stream, parser = get_parser(data, data['streams'][stream_id], sessid) # update client's variables c = SimpleCookie() c['hpath'] = hpath c['stream'] = str(stream_id) print c # send cookie to client # send headers print 'Content-Type: text/html' print # breadcrumb trail path up top print_path(path, data, stream_id) # fields print '''<table id="maintable" border="1"> <tr class="header"> <th class="headertext">Offset</th> <th class="headertext">Name</th> <th class="headertext">Type</th> <th class="headertext">Size</th> <th class="headertext">Description</th> <th class="headertext">Data</th> <th class="headertext">Download Field</th> </tr>''' for i in parser[path]: # determine options display = i.raw_display if form.getfirst('raw','0') == '1'\ else i.display disp_off = bits2hex if form.getfirst('hex','0') == '1'\ else bits2dec addr = i.address if form.getfirst('rel','0') == '1'\ else i.absolute_address if display == 'None': display = '' # clickable name for field sets if i.is_field_set: name = '''<span href="#" onClick="goPath('%s%s/')"\ class="fieldlink">%s/</span>''' % (path, i.name, i.name) else: name = i.name print '<tr class="data">' print '<td class="fldaddress">%s</td>' % disp_off(addr) print '<td class="fldname">%s</td>' % name print '<td class="fldtype">%s</td>' % i.__class__.__name__ print '<td class="fldsize">%s</td>' % disp_off(i.size) print '<td class="flddesc">%s</td>' % i.description print '<td class="flddisplay">%s</td>' % display print '<td class="flddownload">' paths = hpath.split(':') paths[stream_id] += i.name url = "%s?hpath=%s&stream=%s"%\ (script_name,':'.join(paths), stream_id) # hack to determine if a substream is present # the default getSubIStream() returns InputFieldStream() # InputFieldStream() then returns an InputSubStream. # in all the overrides, the return is a different stream type, # but this is certainly not the safest way to check for # an overridden method... # finally, if the field is a SubFile, then it has a custom # substream, and thus gets the substream features. if not isinstance(i.getSubIStream(), InputSubStream)\ or isinstance(i, SubFile): print '<a href="javascript:addStream(\'%s\')"\ class="dllink">Parse Substream</a><br/>' % (path + i.name) print '<a href="%s&savesub=1"\ class="dllink">Download Substream</a><br/>' % url print '<a href="%s&save=1"\ class="dllink">Download Raw</a>' % url else: print '<a href="%s&save=1"\ class="dllink">Download</a>' % url print '</td>' print '</tr>' print '</table>' print_path(path, data, stream_id) if sys.stderr.getvalue(): print_error('Error(s) encountered:', print_headers=False) print '<pre class="parseerror">%s</pre>' % sys.stderr.getvalue() else: print_form('Note: Cookies MUST be enabled!')
def handle_form(): """Process submitted data. See comments for details. """ prune_old() form = cgi.FieldStorage() if 'file' in form and form['file'].file: # compute session id sessid = get_sessid() if not sessid: rand = str(time.time())+form['file'].filename+str(random.random()) sessid = hashlib.md5(rand).hexdigest() # write uploaded file f = open(tmp_dir+sessid+'.file','wb') if form['file'].done==-1: raise ValueError("File upload canceled?") while f.tell()<2**22: # 4MB limit chunk = form['file'].file.read(32768) # 32KB chunks if not chunk: break f.write(chunk) if f.tell() == 0: f.close() print_form('Nothing uploaded.') return f.close() # write session variables try: fn = unicode(form['file'].filename,'utf-8') except UnicodeDecodeError: fn = unicode(form['file'].filename,'iso-8859-1') # stream "None" represents the original stream save_data({'filename':fn,'streams':[(None, None, fn)]}, sessid) # send session id and reset variables c = SimpleCookie() c['sess'] = sessid c['hpath'] = '/' # clear path var. c['stream'] = '0' # clear stream var print c # send cookie to client (headers) print_page() # print AJAX frame page elif get_sessid(): # or perhaps you already have a file to parse? if not 'hpath' in form: print_page() return # redirect stderr, so we can catch parser errors sys.stderr = StringIO() # load variables hpath = cgi.escape(form.getfirst('hpath','/')) stream_id = int(form.getfirst('stream','0')) path = hpath.split(':')[stream_id] sessid = get_sessid() try: data = cPickle.load(file(tmp_dir+sessid+'.sess','rb')) except IOError: print_error('Your file was deleted due to inactivity. ' 'Please upload a new one.') return stream, parser = get_parser(data, data['streams'][stream_id], sessid) if parser is None: return # sorry, couldn't parse file! if 'save' in form: # "Download Raw" f = FileFromInputStream(stream) fld = parser[path] f.seek(fld.absolute_address/8) size = alignValue(fld.size, 8)/8 sys.stdout.write('Content-Type: application/octet-stream\r\n') sys.stdout.write('Content-Length: %i\r\n'%size) sys.stdout.write('Content-Disposition: attachment; ' 'filename=%s\r\n\r\n'%path.strip('/').split('/')[-1]) sys.stdout.write(f.read(size)) return elif 'savesub' in form: # "Download Substream" stream = parser[path.rstrip('/')].getSubIStream() filename = path.strip('/').split('/')[-1] tags = getattr(stream,'tags',[]) for tag in tags: if tag[0] == 'filename': filename = tag[1] sys.stdout.write('Content-Type: application/octet-stream\r\n') sys.stdout.write('Content-Disposition: attachment; ' 'filename=%s\r\n\r\n'%filename) sys.stdout.write(FileFromInputStream(stream).read()) return elif 'addStream' in form: # "Parse Substream" spath = cgi.escape(form['addStream'].value) new_stream = parser[spath.rstrip('/')].getSubIStream() streamdata = FileFromInputStream(new_stream).read() new_parser = guessParser(new_stream) if new_parser: stream = new_stream parser = new_parser tags = getattr(stream,'tags',[]) streamname = data['streams'][stream_id][2]+':' data['streams'].append((tags, streamdata, streamname+spath)) try: if force_substream_ref: raise Exception("Use references for all substreams") save_data(data, sessid) except Exception: # many things could go wrong with pickling data['streams'][-1] = (data['streams'][stream_id], spath, streamname+spath) save_data(data, sessid) path = '/' hpath += ':/' stream_id = len(data['streams'])-1 else: sys.stderr.write("Cannot parse substream %s: " "No suitable parser\n"%spath) elif 'delStream' in form: # "Delete Stream" n = int(form['delStream'].value) paths = hpath.split(':') del paths[n] del data['streams'][n] if n >= len(data['streams']): stream_id = 0 else: stream_id = n path = paths[stream_id] hpath = ':'.join(paths) save_data(data, sessid) stream, parser = get_parser(data, data['streams'][stream_id], sessid) # update client's variables c = SimpleCookie() c['hpath'] = hpath c['stream'] = str(stream_id) print c # send cookie to client # send headers print 'Content-Type: text/html' print # breadcrumb trail path up top print_path(path, data, stream_id) # fields print '''<table id="maintable" border="1"> <tr class="header"> <th class="headertext">Offset</th> <th class="headertext">Name</th> <th class="headertext">Type</th> <th class="headertext">Size</th> <th class="headertext">Description</th> <th class="headertext">Data</th> <th class="headertext">Download Field</th> </tr>''' for i in parser[path]: # determine options display = i.raw_display if form.getfirst('raw','0') == '1'\ else i.display disp_off = bits2hex if form.getfirst('hex','0') == '1'\ else bits2dec addr = i.address if form.getfirst('rel','0') == '1'\ else i.absolute_address if display == 'None': display = '' # clickable name for field sets if i.is_field_set: name = '''<span href="#" onClick="goPath('%s%s/')"\ class="fieldlink">%s/</span>'''%(path, i.name, i.name) else: name = i.name print '<tr class="data">' print '<td class="fldaddress">%s</td>'%disp_off(addr) print '<td class="fldname">%s</td>'%name print '<td class="fldtype">%s</td>'%i.__class__.__name__ print '<td class="fldsize">%s</td>'%disp_off(i.size) print '<td class="flddesc">%s</td>'%i.description print '<td class="flddisplay">%s</td>'%display print '<td class="flddownload">' paths = hpath.split(':') paths[stream_id] += i.name url = "%s?hpath=%s&stream=%s"%\ (script_name,':'.join(paths), stream_id) # hack to determine if a substream is present # the default getSubIStream() returns InputFieldStream() # InputFieldStream() then returns an InputSubStream. # in all the overrides, the return is a different stream type, # but this is certainly not the safest way to check for # an overridden method... # finally, if the field is a SubFile, then it has a custom # substream, and thus gets the substream features. if not isinstance(i.getSubIStream(), InputSubStream)\ or isinstance(i, SubFile): print '<a href="javascript:addStream(\'%s\')"\ class="dllink">Parse Substream</a><br/>'%(path+i.name) print '<a href="%s&savesub=1"\ class="dllink">Download Substream</a><br/>'%url print '<a href="%s&save=1"\ class="dllink">Download Raw</a>'%url else: print '<a href="%s&save=1"\ class="dllink">Download</a>'%url print '</td>' print '</tr>' print '</table>' print_path(path, data, stream_id) if sys.stderr.getvalue(): print_error('Error(s) encountered:', print_headers=False) print '<pre class="parseerror">%s</pre>'%sys.stderr.getvalue() else: print_form('Note: Cookies MUST be enabled!')