def my_open(buf, page, parent=None): dircache = {} vbaiter = None docdata = "" docdataiter = None tbliter = None dirflag = 0 ftype = "" try: if parent is None: gsffilename = page.fname else: # need to save tmp file to pass to gsf gsffilename = "tmp%s" % time.time() f = open(gsffilename, "wb") f.write(buf) f.close() gsfout = subprocess.check_output(["gsf", "list", gsffilename]) print gsfout print "-----------------" for i in gsfout.split("\n")[1:-1]: if i[0] == "f": # gsf sometimes lists date even for files. Or, rather, it # seems that it misrepresents empty dirs as (empty) files. # I have observed this with 'Objects' in many .pub files. if i[5] != ' ': fullname = " ".join(i.split()[4:]) else: fullname = " ".join(i.split()[2:]) if not len(fullname): fullname = " ".join(i.split()[2:]) if "/" in fullname: fns = fullname.split("/") cdir = "/".join(fns[:-1]) fn = fns[-1] else: fn = fullname cdir = "" if len(fn) and ord(fn[0]) < 32: fn = fn[1:] pn = None if cdir: cdir_to_treeiter(page, parent, cdir, dircache) pn = dircache["/" + cdir] data = subprocess.check_output( ["gsf", "cat", gsffilename, fullname]) iter1 = add_pgiter(page, fn, "ole", fn, data, pn) if fn == "DesignerDoc": ftype = "dsf" page.model.set_value(iter1, 1, ("dsf", dirflag)) dsf.open(page, data, iter1) if (fn == "EscherStm" or fn == "EscherDelayStm"): # and infchild.size()>0: ftype = "escher" page.model.set_value(iter1, 1, ("escher", dirflag)) escher.parse( page.model, data, iter1, "pub" ) # currently I don't parse it automagically for MSDOC if fn == "MagicTab": ftype = "wls" page.model.set_value(iter1, 1, ("wls", dirflag)) wls.parse(page, data, iter1) if fn == "CONTENTS": if data[6:11] == "WT602": ftype = "wt602" page.model.set_value(iter1, 1, ("wt602", dirflag)) wt602.parse(page, data, iter1) elif fullname.split('/')[0] == "OleObjects": # Nested OLE objects (or images) in WT602 wt602.parse_object(page, data, iter1) else: ftype = "quill" page.model.set_value(iter1, 1, ("quill", dirflag)) quill.parse(page, data, iter1) if fn == "Contents": if data[:2] == "\xe8\xac": # take signature into account ftype = "pub" page.model.set_value(iter1, 1, ("pub", dirflag)) pub.parse(page, data, iter1) if fn == "VisioDocument": ftype = "vsd" page.model.set_value(iter1, 1, ("vsd", dirflag)) # level = 1? vsd.parse(page, data, iter1) if fn == "PageMaker": ftype = "pm" page.model.set_value(iter1, 1, ("pm", dirflag)) pm6.open(page, data, iter1) if fn == "WordDocument": ftype = "doc" page.model.set_value(iter1, 1, ("doc", dirflag)) #level = 1 doc.parse(page, data, iter1) if fn == "1Table" or fn == "0Table": page.wtable = iter1 if fn == "Data" and page.type == "DOC": page.wdata = iter1 if fn == "Book" or fn == "Workbook": page.model.set_value(iter1, 1, ("xls", dirflag)) ftype = xls.parse(page, data, iter1) if fn == "PowerPoint Document" or fn == "Pictures": ftype = "ppt" page.model.set_value(iter1, 1, ("ppt", dirflag)) ppt.parse(page, data, iter1) if fn == "NativeContent_MAIN": ftype = "qpw" page.model.set_value(iter1, 1, ("qpw", dirflag)) qpw.parse(page, data, iter1) if fn == "Signature" and data[:4] == '\x60\x67\x01\x00': ftype = "ppp" #PagePlus OLE version (9.x?) if (fn == "contents" or fn == "SCFFPreview") and ftype == "ppp": ppp.parse(page, data, iter1, fn) # I've no idea if this is really the signature, but it is # present in all files I've seen so far if fn == "Header" and data[0xc:0xf] == 'xV4': ftype = 'zmf' zmf.zmf2_open(page, data, iter1, fn) if fn[-4:] == '.zmf': ftype = 'zmf' zmf.zmf2_open(page, data, iter1, fn) if fn == "VBA": page.type = "vba" ftype = "vba" #if (ftype == "vba" and fn == "dir") or if "vba/dir" in fullname.lower(): page.model.set_value(iter1, 1, ("vba", dirflag)) vbaiter = iter1 vbadata = data if "SummaryInformation" in fn: page.model.set_value(iter1, 1, ("ole", "propset")) if parent is None: try: os.remove(gsffilename) except: pass else: if i.split()[2] == "VBA": page.type = "vba" ftype = "vba" if vbaiter != None: vba.parse(page, vbadata, vbaiter) except subprocess.CalledProcessError: print "Failed to run gsf. Please install libgsf." return ftype = "TEST" return ftype
def parse(page, data, parent): offset = 0 ftype = "XLS" idx = 0 lblidx = 1 iters = [] iters.append(parent) print("Length of iters ", len(iters)) curiter = iters[len(iters) - 1] try: while offset < len(data) - 4: rtype = struct.unpack("<H", data[offset:offset + 2])[0] if rtype == 0: print("Break.", offset, len(data)) break iter1 = page.model.append(curiter, None) rname = "" if rtype in rec_ids: rname = rec_ids[rtype] print(rtype, rname, offset) if rtype == 0x809: iters.append(iter1) curiter = iter1 ver = struct.unpack("<H", data[offset + 4:offset + 6])[0] dt = struct.unpack("<H", data[offset + 6:offset + 8])[0] if dt in substream: rname = "BOF (%s)" % substream[dt] else: rname = "BOF (unknown)" if ver == 0x500: ftype = "XLS5" page.version = 5 print("Version: 5") elif ver == 0x600: ftype = "XLS8" page.version = 8 print("Version: 8") elif rtype == 10 or rtype == 0x1034: iters.pop() curiter = iters[len(iters) - 1] elif rtype == 0x1033: iters.append(iter1) curiter = iter1 # elif rtype == 0x208: #row # rname = "Row %04x"%struct.unpack("<H",data[offset+0x10:offset+0x12]) elif rtype == 0xe0: #xf rname = "XF %02x" % idx idx += 1 elif rtype == 0x18: #Lbl rname = "Lbl %02x" % lblidx lblidx += 1 offset += 2 rlen = struct.unpack("<H", data[offset:offset + 2])[0] offset += 2 rdata = data[offset - 4:offset + rlen] page.model.set_value(iter1, 0, rname) page.model.set_value(iter1, 1, ("xls", rtype)) page.model.set_value(iter1, 2, len(rdata)) page.model.set_value(iter1, 3, rdata) page.model.set_value(iter1, 7, "0x%02x" % rtype) page.model.set_value(iter1, 6, page.model.get_string_from_iter(iter1)) if rtype == 0xec: #MsoDrawing escher.parse(page.model, rdata[4:], iter1) offset += rlen except: print("Something was wrong in XLS parse") return ftype
def gsf_get_children(page, infile, parent, ftype, dirflag=0): vbaiter = None docdata = "" docdataiter = None tbliter = None for i in range(infile.num_children()): infchild = infile.child_by_index(i) infname = infile.name_by_index(i) chsize = infchild.size() # print "Name ", infname, dirflag if ord(infname[0]) < 32: infname = infname[1:] if infname == "dir": infuncomp = infchild.uncompress() data = infuncomp.read(infuncomp.size()) else: data = infchild.read(chsize) iter1 = add_pgiter(page, infname, "ole", dirflag, data) if (infname == "EscherStm" or infname == "EscherDelayStm") and chsize > 0: ftype = "escher" page.model.set_value(iter1, 1, ("escher", dirflag)) escher.parse( page.model, data, iter1, "pub") # currently I don't parse it automagically for MSDOC if infname == "CONTENTS": if data[6:11] == "WT602": ftype = "wt602" page.model.set_value(iter1, 1, ("wt602", dirflag)) wt602.parse(page, data, iter1) else: ftype = "quill" page.model.set_value(iter1, 1, ("quill", dirflag)) quill.parse(page, data, iter1) if infname == "Contents": if data and data[:2] == "\xe8\xac": # take signature into account ftype = "pub" page.model.set_value(iter1, 1, ("pub", dirflag)) pub.parse(page, data, iter1) if infname == "VisioDocument": ftype = "vsd" page.model.set_value(iter1, 1, ("vsd", dirflag)) # level = 1? # choose vsd or vsd2 vsd.parse(page, data, iter1) if infname == "PageMaker": ftype = "pm" page.model.set_value(iter1, 1, ("pm", dirflag)) pm6.open(page, data, iter1) if infname == "WordDocument": ftype = "doc" page.model.set_value(iter1, 1, ("doc", dirflag)) #level = 1 doc.parse(page, data, iter1) if infname == "1Table" or infname == "0Table": page.wtable = iter1 if infname == "Data": page.wdata = iter1 if infname == "Book" or infname == "Workbook": page.model.set_value(iter1, 1, ("xls", dirflag)) ftype = xls.parse(page, data, iter1) if infname == "PowerPoint Document" or infname == "Pictures" and data != None: ftype = "ppt" page.model.set_value(iter1, 1, ("ppt", dirflag)) ppt.parse(page, data, iter1) if infname == "NativeContent_MAIN": ftype = "qpw" page.model.set_value(iter1, 1, ("qpw", dirflag)) qpw.parse(page, data, iter1) if infname == "Signature" and data[:4] == '\x60\x67\x01\x00': ftype = "ppp" #PagePlus OLE version (9.x?) if (infname == "contents" or infname == "SCFFPreview") and ftype == "ppp": ppp.parse(page, data, iter1, infname) # I've no idea if this is really the signature, but it is # present in all files I've seen so far if infname == "Header" and data[0xc:0xf] == 'xV4': ftype = 'zmf' zmf.zmf2_open(page, data, iter1, infname) if infname[-4:] == '.zmf': ftype = 'zmf' zmf.zmf2_open(page, data, iter1, infname) if infname == "VBA": page.type = ftype ftype = "vba" if ftype == "vba" and infname == "dir": page.model.set_value(iter1, 1, ("vba", dirflag)) vbaiter = iter1 vbadata = data if (infile.num_children() > 0): page.model.set_value(iter1, 1, (ftype, 1)) gsf_get_children(page, infchild, iter1, ftype, 0) if "SummaryInformation" in infname: page.model.set_value(iter1, 1, ("ole", "propset")) if vbaiter != None: vba.parse(page, vbadata, vbaiter) return ftype
def parse (page, data, parent): offset = 0 ftype = "XLS" idx = 0 lblidx = 1 iters = [] iters.append(parent) print "Length of iters ",len(iters) curiter = iters[len(iters)-1] try: while offset < len(data) - 4: rtype = struct.unpack("<H",data[offset:offset+2])[0] if rtype == 0: print "Break.",offset,len(data) break iter1 = page.model.append(curiter,None) rname = "" if rec_ids.has_key(rtype): rname = rec_ids[rtype] print rtype, rname, offset if rtype == 0x809: iters.append(iter1) curiter = iter1 ver = struct.unpack("<H",data[offset+4:offset+6])[0] dt = struct.unpack("<H",data[offset+6:offset+8])[0] if substream.has_key(dt): rname = "BOF (%s)"%substream[dt] else: rname = "BOF (unknown)" if ver == 0x500: ftype = "XLS5" page.version = 5 print "Version: 5" elif ver == 0x600: ftype = "XLS8" page.version = 8 print "Version: 8" elif rtype == 10 or rtype == 0x1034: iters.pop() curiter = iters[len(iters)-1] elif rtype == 0x1033: iters.append(iter1) curiter = iter1 # elif rtype == 0x208: #row # rname = "Row %04x"%struct.unpack("<H",data[offset+0x10:offset+0x12]) elif rtype == 0xe0: #xf rname = "XF %02x"%idx idx += 1 elif rtype == 0x18: #Lbl rname = "Lbl %02x"%lblidx lblidx += 1 offset += 2 rlen = struct.unpack("<H",data[offset:offset+2])[0] offset += 2 rdata = data[offset-4:offset+rlen] page.model.set_value(iter1,0,rname) page.model.set_value(iter1,1,("xls",rtype)) page.model.set_value(iter1,2,len(rdata)) page.model.set_value(iter1,3,rdata) page.model.set_value(iter1,7,"0x%02x"%rtype) page.model.set_value(iter1,6,page.model.get_string_from_iter(iter1)) if rtype == 0xec: #MsoDrawing escher.parse (page.model,rdata[4:],iter1) offset += rlen except: print "Something was wrong in XLS parse" return ftype
def parse (cmd, entry, page): if cmd[0] == "$": pos = cmd.find("@") if pos != -1: chtype = cmd[1:pos] chaddr = cmd[pos+1:] else: chtype = cmd[1:] chaddr = "0" print("Command: ",chtype,chaddr) treeSelection = page.view.get_selection() model, iter1 = treeSelection.get_selected() if iter1 == None: page.view.set_cursor_on_cell(0) treeSelection = page.view.get_selection() model, iter1 = treeSelection.get_selected() buf = model.get_value(iter1,3) if "ole" == chtype.lower(): if buf[int(chaddr,16):int(chaddr,16)+8] == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1": ole.ole_open (buf[int(chaddr,16):],page,iter1) else: print("OLE stream not found at ",chaddr) elif "bmp" == chtype.lower(): #try: if 1: addr,bpp,w,h = chaddr.split(":") dib_data = "\x28\x00\x00\x00"+struct.pack("<I",int(w))+struct.pack("<I",int(h))+"\x01\x00" dib_data += struct.pack("<H",int(bpp))+"\x00"*8+struct.pack("<I",160)*2+"\x00"*8 dib_data += buf[int(addr,16):] iter2 = add_pgiter (page,"[BMP]","escher","Blip",dib2bmp(dib_data),iter1) model.set_value(iter2,1,("escher","odraw","Blip")) #except: # print 'Failed to construct DIB data' elif "b64" == chtype.lower(): b64decode (page,buf[int(chaddr,16):],iter1) elif "cvx" == chtype.lower(): cvx.parse (page,buf[int(chaddr,16):],iter1) elif "esc" == chtype.lower(): escher.parse (model,buf[int(chaddr,16):],iter1) elif "cmx" == chtype.lower(): cdr.cdr_open (buf[int(chaddr,16):],page,iter1) elif "icc" == chtype.lower(): icc.parse (page,buf[int(chaddr,16):],iter1) elif "cdx" == chtype.lower(): chdraw.open (page,buf[int(chaddr,16):],iter1) elif "yep" == chtype.lower(): yep.parse (page,buf[int(chaddr,16):],iter1) elif "yep0" == chtype.lower(): yep.parse (page,buf[int(chaddr,16):],iter1,0) elif "emf" == chtype.lower(): pt = page.type page.type = "EMF" mf.mf_open (buf[int(chaddr,16):],page,iter1) page.type = pt elif "pix" == chtype.lower(): # try: off = int(chaddr,16) ntype = model.get_value(iter1,1) if off: iter2 = add_pgiter(page,"Picture","escher","Blip",buf[off:],iter1) model.set_value(iter2,1,("escher","odraw","Blip")) else: model.set_value(iter1,1,("escher","odraw","Blip")) page.hd.hv.parent.on_row_activated(page.hd.hv,model.get_path(iter1),None) # except: # print "Failed to add as a picture" elif "dump" == chtype.lower(): dlg = gtk.FileChooserDialog('Save...', action=gtk.FILE_CHOOSER_ACTION_SAVE, buttons=(gtk.STOCK_OK,gtk.RESPONSE_OK,gtk.STOCK_CANCEL,gtk.RESPONSE_CANCEL)) dlg.set_local_only(True) resp = dlg.run() fname = dlg.get_filename() dlg.hide() if resp != gtk.RESPONSE_CANCEL: nlen = model.get_value(iter1,2) if chaddr != 0: pos = chaddr.find(":") if pos != -1: endaddr = chaddr[pos+1:] chaddr = chaddr[:pos] value = model.get_value(iter1,3)[int(chaddr,16):int(endaddr,16)] else: value = model.get_value(iter1,3)[int(chaddr,16):] else: value = model.get_value(iter1,3)[int(chaddr,16):] if nlen != None: f = open(fname,'wb') f.write(value) f.close() else: print("Nothing to save") elif "wmf" == chtype.lower() or "apwmf" == chtype.lower(): pt = page.type page.type = chtype.upper() mf.mf_open (buf[int(chaddr,16):],page,iter1) page.type = pt elif "xls" == chtype.lower(): ch2 = chaddr[1] if ch2.isdigit(): coladdr = ord(chaddr[0].lower()) - 97 rowaddr = int(chaddr[1:]) - 1 else: coladdr = 26*(ord(chaddr[0].lower()) - 96)+ ord(chaddr[1].lower()) - 97 rowaddr = int(chaddr[2:]) - 1 page.search = gtk.TreeStore(gobject.TYPE_STRING, gobject.TYPE_INT, gobject.TYPE_STRING, gobject.TYPE_INT) model.foreach(xlsfind,(page,rowaddr,coladdr)) page.show_search("XLS: cell %s"%chaddr) elif "rx2" == chtype.lower(): newL = struct.unpack('>I', buf[int(chaddr,16)+4:int(chaddr,16)+8])[0] rx2.parse (model,buf[int(chaddr,16):int(chaddr,16)+newL],0,iter1) elif "dib" == chtype.lower(): iter2 = add_pgiter (page,"[BMP]","",0,dib2bmp(buf[int(chaddr,16):]),iter1) model.set_value(iter2,1,("escher","odraw","Blip")) elif "pct" == chtype.lower(): pict.parse (page,buf,iter1) elif "pm6" == chtype.lower(): off = int(chaddr,16) pm6.open (page,buf,iter1,off) elif "vba" == chtype.lower(): # off = int(chaddr,16) vba.parse (page,buf,iter1) elif "zip" == chtype.lower(): try: print(int(chaddr,16)) decobj = zlib.decompressobj() output = decobj.decompress(buf[int(chaddr,16):]) add_pgiter (page,"[Decompressed data]","",0,output,iter1) tail = decobj.unused_data if len(tail) > 0: add_pgiter (page,"[Tail]","",0,tail,iter1) except: print("Failed to decompress as zlib") try: f = StringIO.StringIO(buf[int(chaddr,16):]) pkzip.open(f, page, iter1) f.close() except: print("Failed to decompress as pkzip") elif cmd[0] == "?": ctype = cmd[1] carg = cmd[2:] # convert line to hex or unicode if required data = arg_conv(ctype,carg) model = page.view.get_model() page.search = gtk.TreeStore(gobject.TYPE_STRING, gobject.TYPE_INT, gobject.TYPE_STRING, gobject.TYPE_INT) if ctype == 'r' or ctype == 'R': model.foreach(recfind,(page,data)) else: model.foreach(cmdfind,(page,data)) page.show_search(carg)
def my_open (buf,page,parent=None): dircache = {} vbaiter = None docdata = "" docdataiter = None tbliter = None dirflag=0 ftype = "" tmpfile = None try: if parent is None: gsffilename = page.fname else: # need to save tmp file to pass to gsf (tmpfd, tmpfile) = mkstemp() gsffilename = tmpfile os.write(tmpfd, buf) os.close(tmpfd) gsfout = subprocess.check_output(["gsf", "list", gsffilename]) print gsfout print "-----------------" for i in gsfout.split("\n")[1:-1]: if i[0] == "f": # gsf sometimes lists date even for files. Or, rather, it # seems that it misrepresents empty dirs as (empty) files. # I have observed this with 'Objects' in many .pub files. if i[5] != ' ': fullname = " ".join(i.split()[4:]) else: fullname = " ".join(i.split()[2:]) if not len(fullname): fullname = " ".join(i.split()[2:]) if "/" in fullname: fns = fullname.split("/") cdir = "/".join(fns[:-1]) fn = fns[-1] else: fn = fullname cdir = "" if len(fn) and ord(fn[0]) < 32: fn = fn[1:] if cdir: cdir_to_treeiter(page,parent,cdir,dircache) pn = dircache["/"+cdir] else: pn = parent data = subprocess.check_output(["gsf", "cat", gsffilename, fullname]) iter1 = add_pgiter(page,fn,"ole",fn,data,pn) if fn == "DesignerDoc": ftype = "dsf" page.model.set_value(iter1,1,("dsf",dirflag)) dsf.open (page, data, iter1) if (fn == "EscherStm" or fn == "EscherDelayStm"): # and infchild.size()>0: ftype = "escher" page.model.set_value(iter1,1,("escher",dirflag)) escher.parse (page.model,data,iter1,"pub") # currently I don't parse it automagically for MSDOC if fn == "MagicTab": ftype = "wls" page.model.set_value(iter1,1,("wls",dirflag)) wls.parse (page,data,iter1) if fn == "CONTENTS": if data[6:11] == "WT602": ftype = "wt602" page.model.set_value(iter1,1,("wt602",dirflag)) wt602.parse (page,data,iter1) elif fullname.split('/')[0] == "OleObjects": # Nested OLE objects (or images) in WT602 wt602.parse_object(page, data, iter1) else: ftype = "quill" page.model.set_value(iter1,1,("quill",dirflag)) quill.parse (page,data,iter1) if fn == "Contents": if data[:2] == "\xe8\xac": # take signature into account ftype = "pub" page.model.set_value(iter1,1,("pub",dirflag)) page.appcontentdoc=pub.PublisherContentDoc(page,iter1) page.appcontentdoc.parse(data) if fn == "VisioDocument": ftype = "vsd" page.model.set_value(iter1,1,("vsd",dirflag)) # level = 1? vsd.parse (page, data, iter1) if fn == "PageMaker": ftype = "pm" page.model.set_value(iter1,1,("pm",dirflag)) pm6.open (page, data, iter1) if fn == "WordDocument": ftype = "doc" page.model.set_value(iter1,1,("doc",dirflag)) #level = 1 doc.parse (page, data, iter1) if fn == "1Table" or fn == "0Table": page.wtable = iter1 if fn == "Data" and page.type == "DOC": page.wdata = iter1 if fn == "Book" or fn == "Workbook": page.model.set_value(iter1,1,("xls",dirflag)) ftype = xls.parse (page, data, iter1) if fn == "PowerPoint Document" or fn == "Pictures": ftype = "ppt" page.model.set_value(iter1,1,("ppt",dirflag)) ppt.parse (page, data, iter1) if fn == "NativeContent_MAIN": ftype = "qpw" page.model.set_value(iter1,1,("qpw",dirflag)) qpw.parse (page, data, iter1) if fn == "Signature" and data[:4] == '\x60\x67\x01\x00': ftype = "ppp" #PagePlus OLE version (9.x?) if (fn == "contents" or fn == "SCFFPreview") and ftype == "ppp": ppp.parse(page,data,iter1,fn) # I've no idea if this is really the signature, but it is # present in all files I've seen so far if fn == "Header" and data[0xc:0xf] == 'xV4': ftype = 'zmf' zmf.zmf2_open(page, data, iter1, fn) if fn[-4:] == '.zmf': ftype = 'zmf' zmf.zmf2_open(page, data, iter1, fn) if fn[-4:] == '.BMI' and fullname.split('/')[0] == 'Bitmaps': ftype = 'bmi' bmi.open(data, page, iter1) if fn == "VBA": page.type = "vba" ftype = "vba" #if (ftype == "vba" and fn == "dir") or if "vba/dir" in fullname.lower(): page.model.set_value(iter1,1,("vba",dirflag)) vbaiter = iter1 vbadata = data if "SummaryInformation" in fn: page.model.set_value(iter1,1,("ole","propset")) else: if i.split()[2] == "VBA": page.type = "vba" ftype = "vba" if vbaiter != None: vba.parse (page, vbadata, vbaiter) except subprocess.CalledProcessError: print "Failed to run gsf. Please install libgsf." if tmpfile: try: os.remove(tmpfile) except: pass return ftype
def gsf_get_children(page,infile,parent,ftype,dirflag=0): vbaiter = None docdata = "" docdataiter = None tbliter = None for i in range(infile.num_children()): infchild = infile.child_by_index(i) infname = infile.name_by_index(i) chsize = infchild.size() # print "Name ", infname, dirflag if ord(infname[0]) < 32: infname = infname[1:] if infname == "dir": infuncomp = infchild.uncompress() data = infuncomp.read(infuncomp.size()) else: data = infchild.read(chsize) iter1 = add_pgiter (page, infname, "ole", dirflag, data) if (infname == "EscherStm" or infname == "EscherDelayStm") and chsize>0: ftype = "escher" page.model.set_value(iter1,1,("escher",dirflag)) escher.parse (page.model,data,iter1,"pub") # currently I don't parse it automagically for MSDOC if infname == "CONTENTS": if data[6:11] == "WT602": ftype = "wt602" page.model.set_value(iter1,1,("wt602",dirflag)) wt602.parse (page,data,iter1) else: ftype = "quill" page.model.set_value(iter1,1,("quill",dirflag)) quill.parse (page,data,iter1) if infname == "Contents": if data and data[:2] == "\xe8\xac": # take signature into account ftype = "pub" page.model.set_value(iter1,1,("pub",dirflag)) pub.parse (page,data,iter1) if infname == "VisioDocument": ftype = "vsd" page.model.set_value(iter1,1,("vsd",dirflag)) # level = 1? # choose vsd or vsd2 vsd.parse (page, data, iter1) if infname == "PageMaker": ftype = "pm" page.model.set_value(iter1,1,("pm",dirflag)) pm6.open (page, data, iter1) if infname == "WordDocument": ftype = "doc" page.model.set_value(iter1,1,("doc",dirflag)) #level = 1 doc.parse (page, data, iter1) if infname == "1Table" or infname == "0Table": page.wtable = iter1 if infname == "Data": page.wdata = iter1 if infname == "Book" or infname == "Workbook": page.model.set_value(iter1,1,("xls",dirflag)) ftype = xls.parse (page, data, iter1) if infname == "PowerPoint Document" or infname == "Pictures" and data != None: ftype = "ppt" page.model.set_value(iter1,1,("ppt",dirflag)) ppt.parse (page, data, iter1) if infname == "NativeContent_MAIN": ftype = "qpw" page.model.set_value(iter1,1,("qpw",dirflag)) qpw.parse (page, data, iter1) if infname == "Signature" and data[:4] == '\x60\x67\x01\x00': ftype = "ppp" #PagePlus OLE version (9.x?) if (infname == "contents" or infname == "SCFFPreview") and ftype == "ppp": ppp.parse(page,data,iter1,infname) # I've no idea if this is really the signature, but it is # present in all files I've seen so far if infname == "Header" and data[0xc:0xf] == 'xV4': ftype = 'zmf' zmf.zmf2_open(page, data, iter1, infname) if infname[-4:] == '.zmf': ftype = 'zmf' zmf.zmf2_open(page, data, iter1, infname) if infname == "VBA": page.type = ftype ftype = "vba" if ftype == "vba" and infname == "dir": page.model.set_value(iter1,1,("vba",dirflag)) vbaiter = iter1 vbadata = data if (infile.num_children()>0): page.model.set_value(iter1,1,(ftype,1)) gsf_get_children(page,infchild,iter1,ftype,0) if "SummaryInformation" in infname: page.model.set_value(iter1,1,("ole","propset")) if vbaiter != None: vba.parse (page, vbadata, vbaiter) return ftype
def parse (cmd, entry, page): if cmd[0] == "$": pos = cmd.find("@") if pos != -1: chtype = cmd[1:pos] chaddr = cmd[pos+1:] else: chtype = cmd[1:] chaddr = "0" print "Command: ",chtype,chaddr treeSelection = page.view.get_selection() model, iter1 = treeSelection.get_selected() if iter1 == None: page.view.set_cursor_on_cell(0) treeSelection = page.view.get_selection() model, iter1 = treeSelection.get_selected() buf = model.get_value(iter1,3) if "ole" == chtype.lower(): if buf[int(chaddr,16):int(chaddr,16)+8] == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1": ole.ole_open (buf[int(chaddr,16):],page,iter1) else: print "OLE stream not found at ",chaddr elif "bmp" == chtype.lower(): #try: if 1: addr,bpp,w,h = chaddr.split(":") dib_data = "\x28\x00\x00\x00"+struct.pack("<I",int(w))+struct.pack("<I",int(h))+"\x01\x00" dib_data += struct.pack("<H",int(bpp))+"\x00"*8+struct.pack("<I",160)*2+"\x00"*8 dib_data += buf[int(addr,16):] iter2 = add_pgiter (page,"[BMP]","escher","Blip",dib2bmp(dib_data),iter1) model.set_value(iter2,1,("escher","odraw","Blip")) #except: # print 'Failed to construct DIB data' elif "b64" == chtype.lower(): b64decode (page,buf[int(chaddr,16):],iter1) elif "cvx" == chtype.lower(): cvx.parse (page,buf[int(chaddr,16):],iter1) elif "esc" == chtype.lower(): escher.parse (model,buf[int(chaddr,16):],iter1) elif "cmx" == chtype.lower(): cdr.cdr_open (buf[int(chaddr,16):],page,iter1) elif "icc" == chtype.lower(): icc.parse (page,buf[int(chaddr,16):],iter1) elif "cdx" == chtype.lower(): chdraw.open (page,buf[int(chaddr,16):],iter1) elif "yep" == chtype.lower(): yep.parse (page,buf[int(chaddr,16):],iter1) elif "yep0" == chtype.lower(): yep.parse (page,buf[int(chaddr,16):],iter1,0) elif "emf" == chtype.lower(): pt = page.type page.type = "EMF" mf.mf_open (buf[int(chaddr,16):],page,iter1) page.type = pt elif "pix" == chtype.lower(): # try: off = int(chaddr,16) ntype = model.get_value(iter1,1) if off: iter2 = add_pgiter(page,"Picture","escher","Blip",buf[off:],iter1) model.set_value(iter2,1,("escher","odraw","Blip")) else: model.set_value(iter1,1,("escher","odraw","Blip")) page.hd.hv.parent.on_row_activated(page.hd.hv,model.get_path(iter1),None) # except: # print "Failed to add as a picture" elif "dump" == chtype.lower(): dlg = gtk.FileChooserDialog('Save...', action=gtk.FILE_CHOOSER_ACTION_SAVE, buttons=(gtk.STOCK_OK,gtk.RESPONSE_OK,gtk.STOCK_CANCEL,gtk.RESPONSE_CANCEL)) dlg.set_local_only(True) resp = dlg.run() fname = dlg.get_filename() dlg.hide() if resp != gtk.RESPONSE_CANCEL: nlen = model.get_value(iter1,2) if chaddr != 0: pos = chaddr.find(":") if pos != -1: endaddr = chaddr[pos+1:] chaddr = chaddr[:pos] value = model.get_value(iter1,3)[int(chaddr,16):int(endaddr,16)] else: value = model.get_value(iter1,3)[int(chaddr,16):] else: value = model.get_value(iter1,3)[int(chaddr,16):] if nlen != None: f = open(fname,'wb') f.write(value) f.close() else: print "Nothing to save" elif "wmf" == chtype.lower() or "apwmf" == chtype.lower(): pt = page.type page.type = chtype.upper() mf.mf_open (buf[int(chaddr,16):],page,iter1) page.type = pt elif "xls" == chtype.lower(): ch2 = chaddr[1] if ch2.isdigit(): coladdr = ord(chaddr[0].lower()) - 97 rowaddr = int(chaddr[1:]) - 1 else: coladdr = 26*(ord(chaddr[0].lower()) - 96)+ ord(chaddr[1].lower()) - 97 rowaddr = int(chaddr[2:]) - 1 page.search = gtk.TreeStore(gobject.TYPE_STRING, gobject.TYPE_INT, gobject.TYPE_STRING, gobject.TYPE_INT) model.foreach(xlsfind,(page,rowaddr,coladdr)) page.show_search("XLS: cell %s"%chaddr) elif "rx2" == chtype.lower(): newL = struct.unpack('>I', buf[int(chaddr,16)+4:int(chaddr,16)+8])[0] rx2.parse (model,buf[int(chaddr,16):int(chaddr,16)+newL],0,iter1) elif "dib" == chtype.lower(): iter2 = add_pgiter (page,"[BMP]","",0,dib2bmp(buf[int(chaddr,16):]),iter1) model.set_value(iter2,1,("escher","odraw","Blip")) elif "pct" == chtype.lower(): pict.parse (page,buf,iter1) elif "pm6" == chtype.lower(): off = int(chaddr,16) pm6.open (page,buf,iter1,off) elif "vba" == chtype.lower(): # off = int(chaddr,16) vba.parse (page,buf,iter1) elif "zip" == chtype.lower(): try: print int(chaddr,16) decobj = zlib.decompressobj() output = decobj.decompress(buf[int(chaddr,16):]) add_pgiter (page,"[Decompressed data]","",0,output,iter1) tail = decobj.unused_data if len(tail) > 0: add_pgiter (page,"[Tail]","",0,tail,iter1) except: print "Failed to decompress" elif cmd[0] == "?": ctype = cmd[1] carg = cmd[2:] # convert line to hex or unicode if required data = arg_conv(ctype,carg) model = page.view.get_model() page.search = gtk.TreeStore(gobject.TYPE_STRING, gobject.TYPE_INT, gobject.TYPE_STRING, gobject.TYPE_INT) if ctype == 'r' or ctype == 'R': model.foreach(recfind,(page,data)) else: model.foreach(cmdfind,(page,data)) page.show_search(carg)