Пример #1
0
def my_open(buf, page, parent=None):
    dircache = {}
    vbaiter = None
    docdata = ""
    docdataiter = None
    tbliter = None
    dirflag = 0
    ftype = ""
    try:
        if parent is None:
            gsffilename = page.fname
        else:
            # need to save tmp file to pass to gsf
            gsffilename = "tmp%s" % time.time()
            f = open(gsffilename, "wb")
            f.write(buf)
            f.close()
        gsfout = subprocess.check_output(["gsf", "list", gsffilename])
        print gsfout
        print "-----------------"
        for i in gsfout.split("\n")[1:-1]:
            if i[0] == "f":
                # gsf sometimes lists date even for files. Or, rather, it
                # seems that it misrepresents empty dirs as (empty) files.
                # I have observed this with 'Objects' in many .pub files.
                if i[5] != ' ':
                    fullname = " ".join(i.split()[4:])
                else:
                    fullname = " ".join(i.split()[2:])
                if not len(fullname):
                    fullname = " ".join(i.split()[2:])
                if "/" in fullname:
                    fns = fullname.split("/")
                    cdir = "/".join(fns[:-1])
                    fn = fns[-1]
                else:
                    fn = fullname
                    cdir = ""
                if len(fn) and ord(fn[0]) < 32:
                    fn = fn[1:]
                pn = None
                if cdir:
                    cdir_to_treeiter(page, parent, cdir, dircache)
                    pn = dircache["/" + cdir]
                data = subprocess.check_output(
                    ["gsf", "cat", gsffilename, fullname])
                iter1 = add_pgiter(page, fn, "ole", fn, data, pn)

                if fn == "DesignerDoc":
                    ftype = "dsf"
                    page.model.set_value(iter1, 1, ("dsf", dirflag))
                    dsf.open(page, data, iter1)

                if (fn == "EscherStm"
                        or fn == "EscherDelayStm"):  # and infchild.size()>0:
                    ftype = "escher"
                    page.model.set_value(iter1, 1, ("escher", dirflag))
                    escher.parse(
                        page.model, data, iter1, "pub"
                    )  # currently I don't parse it automagically for MSDOC
                if fn == "MagicTab":
                    ftype = "wls"
                    page.model.set_value(iter1, 1, ("wls", dirflag))
                    wls.parse(page, data, iter1)
                if fn == "CONTENTS":
                    if data[6:11] == "WT602":
                        ftype = "wt602"
                        page.model.set_value(iter1, 1, ("wt602", dirflag))
                        wt602.parse(page, data, iter1)
                    elif fullname.split('/')[0] == "OleObjects":
                        # Nested OLE objects (or images) in WT602
                        wt602.parse_object(page, data, iter1)
                    else:
                        ftype = "quill"
                        page.model.set_value(iter1, 1, ("quill", dirflag))
                        quill.parse(page, data, iter1)
                if fn == "Contents":
                    if data[:2] == "\xe8\xac":  # take signature into account
                        ftype = "pub"
                        page.model.set_value(iter1, 1, ("pub", dirflag))
                        pub.parse(page, data, iter1)
                if fn == "VisioDocument":
                    ftype = "vsd"
                    page.model.set_value(iter1, 1,
                                         ("vsd", dirflag))  # level = 1?
                    vsd.parse(page, data, iter1)
                if fn == "PageMaker":
                    ftype = "pm"
                    page.model.set_value(iter1, 1, ("pm", dirflag))
                    pm6.open(page, data, iter1)
                if fn == "WordDocument":
                    ftype = "doc"
                    page.model.set_value(iter1, 1,
                                         ("doc", dirflag))  #level = 1
                    doc.parse(page, data, iter1)
                if fn == "1Table" or fn == "0Table":
                    page.wtable = iter1
                if fn == "Data" and page.type == "DOC":
                    page.wdata = iter1
                if fn == "Book" or fn == "Workbook":
                    page.model.set_value(iter1, 1, ("xls", dirflag))
                    ftype = xls.parse(page, data, iter1)
                if fn == "PowerPoint Document" or fn == "Pictures":
                    ftype = "ppt"
                    page.model.set_value(iter1, 1, ("ppt", dirflag))
                    ppt.parse(page, data, iter1)
                if fn == "NativeContent_MAIN":
                    ftype = "qpw"
                    page.model.set_value(iter1, 1, ("qpw", dirflag))
                    qpw.parse(page, data, iter1)
                if fn == "Signature" and data[:4] == '\x60\x67\x01\x00':
                    ftype = "ppp"  #PagePlus OLE version (9.x?)
                if (fn == "contents"
                        or fn == "SCFFPreview") and ftype == "ppp":
                    ppp.parse(page, data, iter1, fn)

                # I've no idea if this is really the signature, but it is
                # present in all files I've seen so far
                if fn == "Header" and data[0xc:0xf] == 'xV4':
                    ftype = 'zmf'
                    zmf.zmf2_open(page, data, iter1, fn)
                if fn[-4:] == '.zmf':
                    ftype = 'zmf'
                    zmf.zmf2_open(page, data, iter1, fn)

                if fn == "VBA":
                    page.type = "vba"
                    ftype = "vba"
                #if (ftype == "vba" and fn == "dir") or
                if "vba/dir" in fullname.lower():
                    page.model.set_value(iter1, 1, ("vba", dirflag))
                    vbaiter = iter1
                    vbadata = data

                if "SummaryInformation" in fn:
                    page.model.set_value(iter1, 1, ("ole", "propset"))

                if parent is None:
                    try:
                        os.remove(gsffilename)
                    except:
                        pass
            else:
                if i.split()[2] == "VBA":
                    page.type = "vba"
                    ftype = "vba"
        if vbaiter != None:
            vba.parse(page, vbadata, vbaiter)

    except subprocess.CalledProcessError:
        print "Failed to run gsf. Please install libgsf."
        return

    ftype = "TEST"
    return ftype
Пример #2
0
def parse(page, data, parent):
    offset = 0
    ftype = "XLS"
    idx = 0
    lblidx = 1
    iters = []
    iters.append(parent)
    print("Length of iters ", len(iters))
    curiter = iters[len(iters) - 1]

    try:
        while offset < len(data) - 4:
            rtype = struct.unpack("<H", data[offset:offset + 2])[0]
            if rtype == 0:
                print("Break.", offset, len(data))
                break
            iter1 = page.model.append(curiter, None)
            rname = ""
            if rtype in rec_ids:
                rname = rec_ids[rtype]
            print(rtype, rname, offset)
            if rtype == 0x809:
                iters.append(iter1)
                curiter = iter1
                ver = struct.unpack("<H", data[offset + 4:offset + 6])[0]
                dt = struct.unpack("<H", data[offset + 6:offset + 8])[0]
                if dt in substream:
                    rname = "BOF (%s)" % substream[dt]
                else:
                    rname = "BOF (unknown)"
                if ver == 0x500:
                    ftype = "XLS5"
                    page.version = 5
                    print("Version: 5")
                elif ver == 0x600:
                    ftype = "XLS8"
                    page.version = 8
                    print("Version: 8")
            elif rtype == 10 or rtype == 0x1034:
                iters.pop()
                curiter = iters[len(iters) - 1]
            elif rtype == 0x1033:
                iters.append(iter1)
                curiter = iter1


#			elif rtype == 0x208: #row
#				rname = "Row %04x"%struct.unpack("<H",data[offset+0x10:offset+0x12])
            elif rtype == 0xe0:  #xf
                rname = "XF %02x" % idx
                idx += 1
            elif rtype == 0x18:  #Lbl
                rname = "Lbl %02x" % lblidx
                lblidx += 1
            offset += 2
            rlen = struct.unpack("<H", data[offset:offset + 2])[0]
            offset += 2
            rdata = data[offset - 4:offset + rlen]
            page.model.set_value(iter1, 0, rname)
            page.model.set_value(iter1, 1, ("xls", rtype))
            page.model.set_value(iter1, 2, len(rdata))
            page.model.set_value(iter1, 3, rdata)
            page.model.set_value(iter1, 7, "0x%02x" % rtype)
            page.model.set_value(iter1, 6,
                                 page.model.get_string_from_iter(iter1))
            if rtype == 0xec:  #MsoDrawing
                escher.parse(page.model, rdata[4:], iter1)
            offset += rlen
    except:
        print("Something was wrong in XLS parse")

    return ftype
Пример #3
0
def gsf_get_children(page, infile, parent, ftype, dirflag=0):
    vbaiter = None
    docdata = ""
    docdataiter = None
    tbliter = None
    for i in range(infile.num_children()):
        infchild = infile.child_by_index(i)

        infname = infile.name_by_index(i)
        chsize = infchild.size()
        #		print "Name ", infname, dirflag

        if ord(infname[0]) < 32:
            infname = infname[1:]

        if infname == "dir":
            infuncomp = infchild.uncompress()
            data = infuncomp.read(infuncomp.size())
        else:
            data = infchild.read(chsize)

        iter1 = add_pgiter(page, infname, "ole", dirflag, data)

        if (infname == "EscherStm"
                or infname == "EscherDelayStm") and chsize > 0:
            ftype = "escher"
            page.model.set_value(iter1, 1, ("escher", dirflag))
            escher.parse(
                page.model, data, iter1,
                "pub")  # currently I don't parse it automagically for MSDOC
        if infname == "CONTENTS":
            if data[6:11] == "WT602":
                ftype = "wt602"
                page.model.set_value(iter1, 1, ("wt602", dirflag))
                wt602.parse(page, data, iter1)
            else:
                ftype = "quill"
                page.model.set_value(iter1, 1, ("quill", dirflag))
                quill.parse(page, data, iter1)
        if infname == "Contents":
            if data and data[:2] == "\xe8\xac":  # take signature into account
                ftype = "pub"
                page.model.set_value(iter1, 1, ("pub", dirflag))
                pub.parse(page, data, iter1)
        if infname == "VisioDocument":
            ftype = "vsd"
            page.model.set_value(iter1, 1, ("vsd", dirflag))  # level = 1?
            # choose vsd or vsd2
            vsd.parse(page, data, iter1)
        if infname == "PageMaker":
            ftype = "pm"
            page.model.set_value(iter1, 1, ("pm", dirflag))
            pm6.open(page, data, iter1)
        if infname == "WordDocument":
            ftype = "doc"
            page.model.set_value(iter1, 1, ("doc", dirflag))  #level = 1
            doc.parse(page, data, iter1)
        if infname == "1Table" or infname == "0Table":
            page.wtable = iter1
        if infname == "Data":
            page.wdata = iter1

        if infname == "Book" or infname == "Workbook":
            page.model.set_value(iter1, 1, ("xls", dirflag))
            ftype = xls.parse(page, data, iter1)
        if infname == "PowerPoint Document" or infname == "Pictures" and data != None:
            ftype = "ppt"
            page.model.set_value(iter1, 1, ("ppt", dirflag))
            ppt.parse(page, data, iter1)
        if infname == "NativeContent_MAIN":
            ftype = "qpw"
            page.model.set_value(iter1, 1, ("qpw", dirflag))
            qpw.parse(page, data, iter1)
        if infname == "Signature" and data[:4] == '\x60\x67\x01\x00':
            ftype = "ppp"  #PagePlus OLE version (9.x?)
        if (infname == "contents"
                or infname == "SCFFPreview") and ftype == "ppp":
            ppp.parse(page, data, iter1, infname)

        # I've no idea if this is really the signature, but it is
        # present in all files I've seen so far
        if infname == "Header" and data[0xc:0xf] == 'xV4':
            ftype = 'zmf'
            zmf.zmf2_open(page, data, iter1, infname)
        if infname[-4:] == '.zmf':
            ftype = 'zmf'
            zmf.zmf2_open(page, data, iter1, infname)

        if infname == "VBA":
            page.type = ftype
            ftype = "vba"
        if ftype == "vba" and infname == "dir":
            page.model.set_value(iter1, 1, ("vba", dirflag))
            vbaiter = iter1
            vbadata = data

        if (infile.num_children() > 0):
            page.model.set_value(iter1, 1, (ftype, 1))
            gsf_get_children(page, infchild, iter1, ftype, 0)
        if "SummaryInformation" in infname:
            page.model.set_value(iter1, 1, ("ole", "propset"))

    if vbaiter != None:
        vba.parse(page, vbadata, vbaiter)

    return ftype
Пример #4
0
def parse (page, data, parent):
	offset = 0
	ftype = "XLS"
	idx = 0
	lblidx = 1
	iters = []
	iters.append(parent)
	print "Length of iters ",len(iters)
	curiter = iters[len(iters)-1]

	try:
		while offset < len(data) - 4:
			rtype = struct.unpack("<H",data[offset:offset+2])[0]
			if rtype == 0:
				print "Break.",offset,len(data)
				break
			iter1 = page.model.append(curiter,None)
			rname = ""
			if rec_ids.has_key(rtype):
				rname = rec_ids[rtype]
			print rtype, rname, offset
			if rtype == 0x809:
				iters.append(iter1)
				curiter = iter1
				ver = struct.unpack("<H",data[offset+4:offset+6])[0]
				dt = struct.unpack("<H",data[offset+6:offset+8])[0]
				if substream.has_key(dt):
					rname = "BOF (%s)"%substream[dt]
				else:
					rname = "BOF (unknown)" 
				if ver == 0x500:
					ftype = "XLS5"
					page.version = 5
					print "Version: 5"
				elif ver == 0x600:
					ftype = "XLS8"
					page.version = 8
					print "Version: 8"
			elif rtype == 10 or rtype == 0x1034:
				iters.pop()
				curiter = iters[len(iters)-1]
			elif rtype == 0x1033:
				iters.append(iter1)
				curiter = iter1
#			elif rtype == 0x208: #row
#				rname = "Row %04x"%struct.unpack("<H",data[offset+0x10:offset+0x12])
			elif rtype == 0xe0: #xf
				rname = "XF %02x"%idx
				idx += 1
			elif rtype == 0x18: #Lbl
				rname = "Lbl %02x"%lblidx
				lblidx += 1
			offset += 2
			rlen = struct.unpack("<H",data[offset:offset+2])[0]
			offset += 2
			rdata = data[offset-4:offset+rlen]
			page.model.set_value(iter1,0,rname)
			page.model.set_value(iter1,1,("xls",rtype))
			page.model.set_value(iter1,2,len(rdata))
			page.model.set_value(iter1,3,rdata)
			page.model.set_value(iter1,7,"0x%02x"%rtype)
			page.model.set_value(iter1,6,page.model.get_string_from_iter(iter1))
			if rtype == 0xec: #MsoDrawing
				escher.parse (page.model,rdata[4:],iter1)
			offset += rlen
	except:
		print "Something was wrong in XLS parse"

	return ftype
Пример #5
0
def parse (cmd, entry, page):
	if cmd[0] == "$":
		pos = cmd.find("@")
		if pos != -1:
			chtype = cmd[1:pos]
			chaddr = cmd[pos+1:]
		else:
			chtype = cmd[1:]
			chaddr = "0"
		print("Command: ",chtype,chaddr)
		
		treeSelection = page.view.get_selection()
		model, iter1 = treeSelection.get_selected()
		if iter1 == None:
			page.view.set_cursor_on_cell(0)
			treeSelection = page.view.get_selection()
			model, iter1 = treeSelection.get_selected()
		buf = model.get_value(iter1,3)

		if "ole" == chtype.lower():
			if buf[int(chaddr,16):int(chaddr,16)+8] == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1":
				ole.ole_open (buf[int(chaddr,16):],page,iter1)
			else:
				print("OLE stream not found at ",chaddr)
		elif "bmp" == chtype.lower():
			#try:
			if 1:
				addr,bpp,w,h = chaddr.split(":")
				dib_data = "\x28\x00\x00\x00"+struct.pack("<I",int(w))+struct.pack("<I",int(h))+"\x01\x00"
				dib_data += struct.pack("<H",int(bpp))+"\x00"*8+struct.pack("<I",160)*2+"\x00"*8
				dib_data += buf[int(addr,16):]
				iter2 = add_pgiter (page,"[BMP]","escher","Blip",dib2bmp(dib_data),iter1)
				model.set_value(iter2,1,("escher","odraw","Blip"))
			#except:
			#	print 'Failed to construct DIB data'
		elif "b64" == chtype.lower():
			b64decode (page,buf[int(chaddr,16):],iter1)
		elif "cvx" == chtype.lower():
			cvx.parse (page,buf[int(chaddr,16):],iter1)
		elif "esc" == chtype.lower():
			escher.parse (model,buf[int(chaddr,16):],iter1)
		elif "cmx" == chtype.lower():
			cdr.cdr_open (buf[int(chaddr,16):],page,iter1)
		elif "icc" == chtype.lower():
			icc.parse (page,buf[int(chaddr,16):],iter1)
		elif "cdx" == chtype.lower():
			chdraw.open (page,buf[int(chaddr,16):],iter1)
		elif "yep" == chtype.lower():
			yep.parse (page,buf[int(chaddr,16):],iter1)
		elif "yep0" == chtype.lower():
			yep.parse (page,buf[int(chaddr,16):],iter1,0)

		elif "emf" == chtype.lower():
			pt = page.type
			page.type = "EMF"
			mf.mf_open (buf[int(chaddr,16):],page,iter1)
			page.type = pt
		elif "pix" == chtype.lower():
#			try:
				off = int(chaddr,16)
				ntype = model.get_value(iter1,1)
				if off:
					iter2 = add_pgiter(page,"Picture","escher","Blip",buf[off:],iter1)
					model.set_value(iter2,1,("escher","odraw","Blip"))
				else:
					model.set_value(iter1,1,("escher","odraw","Blip"))
					page.hd.hv.parent.on_row_activated(page.hd.hv,model.get_path(iter1),None)
#			except:
#				print "Failed to add as a picture"
		elif "dump" == chtype.lower():
			dlg = gtk.FileChooserDialog('Save...', action=gtk.FILE_CHOOSER_ACTION_SAVE, buttons=(gtk.STOCK_OK,gtk.RESPONSE_OK,gtk.STOCK_CANCEL,gtk.RESPONSE_CANCEL))
			dlg.set_local_only(True)
			resp = dlg.run()
			fname = dlg.get_filename()
			dlg.hide()
			if resp != gtk.RESPONSE_CANCEL:
				nlen = model.get_value(iter1,2)
				if chaddr != 0:
					pos = chaddr.find(":")
					if pos != -1:
						endaddr = chaddr[pos+1:]
						chaddr = chaddr[:pos]
						value = model.get_value(iter1,3)[int(chaddr,16):int(endaddr,16)]
					else:
						value = model.get_value(iter1,3)[int(chaddr,16):]
				else:
					value = model.get_value(iter1,3)[int(chaddr,16):]

				if nlen != None:
					f = open(fname,'wb')
					f.write(value)
					f.close()
				else:
					print("Nothing to save")
		elif "wmf" == chtype.lower() or "apwmf" == chtype.lower():
			pt = page.type
			page.type = chtype.upper()
			mf.mf_open (buf[int(chaddr,16):],page,iter1)
			page.type = pt
		elif "xls" == chtype.lower():
			ch2 = chaddr[1]
			if ch2.isdigit():
				coladdr = ord(chaddr[0].lower()) - 97
				rowaddr = int(chaddr[1:]) - 1
			else:
				coladdr = 26*(ord(chaddr[0].lower()) - 96)+ ord(chaddr[1].lower()) - 97
				rowaddr = int(chaddr[2:]) - 1
			page.search = gtk.TreeStore(gobject.TYPE_STRING, gobject.TYPE_INT, gobject.TYPE_STRING, gobject.TYPE_INT)
			model.foreach(xlsfind,(page,rowaddr,coladdr))
			page.show_search("XLS: cell %s"%chaddr)
		elif "rx2" == chtype.lower():
			newL = struct.unpack('>I', buf[int(chaddr,16)+4:int(chaddr,16)+8])[0]
			rx2.parse (model,buf[int(chaddr,16):int(chaddr,16)+newL],0,iter1)
		elif "dib" == chtype.lower():
			iter2 = add_pgiter (page,"[BMP]","",0,dib2bmp(buf[int(chaddr,16):]),iter1)
			model.set_value(iter2,1,("escher","odraw","Blip"))
		elif "pct" == chtype.lower():
			pict.parse (page,buf,iter1)
		elif "pm6" == chtype.lower():
			off = int(chaddr,16)
			pm6.open (page,buf,iter1,off)
		elif "vba" == chtype.lower():
			# off = int(chaddr,16)
			vba.parse (page,buf,iter1)
		elif "zip" == chtype.lower():
			try:
				print(int(chaddr,16))
				decobj = zlib.decompressobj()
				output = decobj.decompress(buf[int(chaddr,16):])
				add_pgiter (page,"[Decompressed data]","",0,output,iter1)
				tail = decobj.unused_data
				if len(tail) > 0:
					add_pgiter (page,"[Tail]","",0,tail,iter1)
			except:
				print("Failed to decompress as zlib")
				try:
					f = StringIO.StringIO(buf[int(chaddr,16):])
					pkzip.open(f, page, iter1)
					f.close()
				except:
					print("Failed to decompress as pkzip")

	elif cmd[0] == "?":
		ctype = cmd[1]
		carg = cmd[2:]
		# convert line to hex or unicode if required
		data = arg_conv(ctype,carg)
		model = page.view.get_model()
		page.search = gtk.TreeStore(gobject.TYPE_STRING, gobject.TYPE_INT, gobject.TYPE_STRING, gobject.TYPE_INT)
		if ctype == 'r' or ctype == 'R':
			model.foreach(recfind,(page,data))
		else:
			model.foreach(cmdfind,(page,data))
		page.show_search(carg)
Пример #6
0
def my_open (buf,page,parent=None):
	dircache = {}
	vbaiter = None
	docdata = ""
	docdataiter = None
	tbliter = None
	dirflag=0
	ftype = ""
	tmpfile = None
	try:
		if parent is None:
			gsffilename = page.fname
		else:
			# need to save tmp file to pass to gsf
			(tmpfd, tmpfile) = mkstemp()
			gsffilename = tmpfile
			os.write(tmpfd, buf)
			os.close(tmpfd)
		gsfout = subprocess.check_output(["gsf", "list", gsffilename])
		print gsfout
		print "-----------------"
		for i in gsfout.split("\n")[1:-1]:
			if i[0] == "f":
				# gsf sometimes lists date even for files. Or, rather, it
				# seems that it misrepresents empty dirs as (empty) files.
				# I have observed this with 'Objects' in many .pub files.
				if i[5] != ' ':
					fullname = " ".join(i.split()[4:])
				else:
					fullname = " ".join(i.split()[2:])
				if not len(fullname):
					fullname = " ".join(i.split()[2:])
				if "/" in fullname:
					fns = fullname.split("/")
					cdir = "/".join(fns[:-1])
					fn = fns[-1]
				else:
					fn = fullname
					cdir = ""
				if len(fn) and ord(fn[0]) < 32: 
					fn = fn[1:]
				if cdir:
					cdir_to_treeiter(page,parent,cdir,dircache)
					pn = dircache["/"+cdir]
				else:
					pn = parent
				data = subprocess.check_output(["gsf", "cat", gsffilename, fullname])
				iter1 = add_pgiter(page,fn,"ole",fn,data,pn)
				
				if fn == "DesignerDoc":
					ftype = "dsf"
					page.model.set_value(iter1,1,("dsf",dirflag))
					dsf.open (page, data, iter1)
				
				if (fn == "EscherStm" or fn == "EscherDelayStm"): # and infchild.size()>0:
					ftype = "escher"
					page.model.set_value(iter1,1,("escher",dirflag))
					escher.parse (page.model,data,iter1,"pub") # currently I don't parse it automagically for MSDOC
				if fn == "MagicTab":
					ftype = "wls"
					page.model.set_value(iter1,1,("wls",dirflag))
					wls.parse (page,data,iter1)
				if fn == "CONTENTS":
					if data[6:11] == "WT602":
						ftype = "wt602"
						page.model.set_value(iter1,1,("wt602",dirflag))
						wt602.parse (page,data,iter1)
					elif fullname.split('/')[0] == "OleObjects":
						# Nested OLE objects (or images) in WT602
						wt602.parse_object(page, data, iter1)
					else:
						ftype = "quill"
						page.model.set_value(iter1,1,("quill",dirflag))
						quill.parse (page,data,iter1)
				if fn == "Contents":
					if data[:2] == "\xe8\xac": # take signature into account
						ftype = "pub"
						page.model.set_value(iter1,1,("pub",dirflag))
						page.appcontentdoc=pub.PublisherContentDoc(page,iter1)
						page.appcontentdoc.parse(data)
				if fn == "VisioDocument":
					ftype = "vsd"
					page.model.set_value(iter1,1,("vsd",dirflag)) # level = 1?
					vsd.parse (page, data, iter1)
				if fn == "PageMaker":
					ftype = "pm"
					page.model.set_value(iter1,1,("pm",dirflag))
					pm6.open (page, data, iter1)
				if fn == "WordDocument":
					ftype = "doc"
					page.model.set_value(iter1,1,("doc",dirflag)) #level = 1
					doc.parse (page, data, iter1)
				if fn == "1Table" or fn == "0Table":
					page.wtable = iter1
				if fn == "Data" and page.type == "DOC":
					page.wdata = iter1
				if fn == "Book" or fn == "Workbook":
					page.model.set_value(iter1,1,("xls",dirflag))
					ftype = xls.parse (page, data, iter1)
				if fn == "PowerPoint Document" or fn == "Pictures":
					ftype = "ppt"
					page.model.set_value(iter1,1,("ppt",dirflag))
					ppt.parse (page, data, iter1)
				if fn == "NativeContent_MAIN":
					ftype = "qpw"
					page.model.set_value(iter1,1,("qpw",dirflag))
					qpw.parse (page, data, iter1)
				if fn == "Signature" and data[:4] == '\x60\x67\x01\x00':
					ftype = "ppp"  #PagePlus OLE version (9.x?)
				if (fn == "contents" or fn == "SCFFPreview") and ftype == "ppp":
					ppp.parse(page,data,iter1,fn)
				
				# I've no idea if this is really the signature, but it is
				# present in all files I've seen so far
				if fn == "Header" and data[0xc:0xf] == 'xV4':
					ftype = 'zmf'
					zmf.zmf2_open(page, data, iter1, fn)
				if fn[-4:] == '.zmf':
					ftype = 'zmf'
					zmf.zmf2_open(page, data, iter1, fn)
				if fn[-4:] == '.BMI' and fullname.split('/')[0] == 'Bitmaps':
					ftype = 'bmi'
					bmi.open(data, page, iter1)

				if fn == "VBA":
					page.type = "vba"
					ftype = "vba"
				#if (ftype == "vba" and fn == "dir") or 
				if "vba/dir" in fullname.lower():
					page.model.set_value(iter1,1,("vba",dirflag))
					vbaiter = iter1
					vbadata = data
				
				if "SummaryInformation" in fn:
					page.model.set_value(iter1,1,("ole","propset"))
			else:
				if i.split()[2] == "VBA":
					page.type = "vba"
					ftype = "vba"
		if vbaiter != None:
			vba.parse (page, vbadata, vbaiter)
	
	except subprocess.CalledProcessError:
		print "Failed to run gsf. Please install libgsf."

	if tmpfile:
		try: os.remove(tmpfile)
		except: pass

	return ftype
Пример #7
0
def gsf_get_children(page,infile,parent,ftype,dirflag=0):
	vbaiter = None
	docdata = ""
	docdataiter = None
	tbliter = None
	for i in range(infile.num_children()):
		infchild = infile.child_by_index(i)

		infname = infile.name_by_index(i)
		chsize = infchild.size()
#		print "Name ", infname, dirflag

		if ord(infname[0]) < 32: 
			infname = infname[1:]


		if infname == "dir":
			infuncomp = infchild.uncompress()
			data = infuncomp.read(infuncomp.size())
		else:
			data = infchild.read(chsize)
			
		iter1 = add_pgiter (page, infname, "ole", dirflag, data)
			
		if (infname == "EscherStm" or infname == "EscherDelayStm") and chsize>0:
			ftype = "escher"
			page.model.set_value(iter1,1,("escher",dirflag))
			escher.parse (page.model,data,iter1,"pub") # currently I don't parse it automagically for MSDOC
		if infname == "CONTENTS":
			if data[6:11] == "WT602":
				ftype = "wt602"
				page.model.set_value(iter1,1,("wt602",dirflag))
				wt602.parse (page,data,iter1)
			else:
				ftype = "quill"
				page.model.set_value(iter1,1,("quill",dirflag))
				quill.parse (page,data,iter1)
		if infname == "Contents":
			if data and data[:2] == "\xe8\xac": # take signature into account
				ftype = "pub"
				page.model.set_value(iter1,1,("pub",dirflag))
				pub.parse (page,data,iter1)
		if infname == "VisioDocument":
			ftype = "vsd"
			page.model.set_value(iter1,1,("vsd",dirflag)) # level = 1?
		# choose vsd or vsd2
			vsd.parse (page, data, iter1)
		if infname == "PageMaker":
			ftype = "pm"
			page.model.set_value(iter1,1,("pm",dirflag))
			pm6.open (page, data, iter1)
		if infname == "WordDocument":
			ftype = "doc"
			page.model.set_value(iter1,1,("doc",dirflag)) #level = 1
			doc.parse (page, data, iter1)
		if infname == "1Table" or infname == "0Table":
			page.wtable = iter1
		if infname == "Data":
			page.wdata = iter1

		if infname == "Book" or infname == "Workbook":
			page.model.set_value(iter1,1,("xls",dirflag))
			ftype = xls.parse (page, data, iter1)
		if infname == "PowerPoint Document" or infname == "Pictures" and data != None:
			ftype = "ppt"
			page.model.set_value(iter1,1,("ppt",dirflag))
			ppt.parse (page, data, iter1)
		if infname == "NativeContent_MAIN":
			ftype = "qpw"
			page.model.set_value(iter1,1,("qpw",dirflag))
			qpw.parse (page, data, iter1)
		if infname == "Signature" and data[:4] == '\x60\x67\x01\x00':
			ftype = "ppp"  #PagePlus OLE version (9.x?)
		if (infname == "contents" or infname == "SCFFPreview") and ftype == "ppp":
			ppp.parse(page,data,iter1,infname)

		# I've no idea if this is really the signature, but it is
		# present in all files I've seen so far
		if infname == "Header" and data[0xc:0xf] == 'xV4':
			ftype = 'zmf'
			zmf.zmf2_open(page, data, iter1, infname)
		if infname[-4:] == '.zmf':
			ftype = 'zmf'
			zmf.zmf2_open(page, data, iter1, infname)

		if infname == "VBA":
			page.type = ftype
			ftype = "vba"
		if ftype == "vba" and infname == "dir":
			page.model.set_value(iter1,1,("vba",dirflag))
			vbaiter = iter1
			vbadata = data

		if (infile.num_children()>0):
			page.model.set_value(iter1,1,(ftype,1))
			gsf_get_children(page,infchild,iter1,ftype,0)
		if "SummaryInformation" in infname:
			page.model.set_value(iter1,1,("ole","propset"))

	if vbaiter != None:
		vba.parse (page, vbadata, vbaiter)

	return ftype
Пример #8
0
def parse (cmd, entry, page):
	if cmd[0] == "$":
		pos = cmd.find("@")
		if pos != -1:
			chtype = cmd[1:pos]
			chaddr = cmd[pos+1:]
		else:
			chtype = cmd[1:]
			chaddr = "0"
		print "Command: ",chtype,chaddr
		
		treeSelection = page.view.get_selection()
		model, iter1 = treeSelection.get_selected()
		if iter1 == None:
			page.view.set_cursor_on_cell(0)
			treeSelection = page.view.get_selection()
			model, iter1 = treeSelection.get_selected()
		buf = model.get_value(iter1,3)

		if "ole" == chtype.lower():
			if buf[int(chaddr,16):int(chaddr,16)+8] == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1":
				ole.ole_open (buf[int(chaddr,16):],page,iter1)
			else:
				print "OLE stream not found at ",chaddr
		elif "bmp" == chtype.lower():
			#try:
			if 1:
				addr,bpp,w,h = chaddr.split(":")
				dib_data = "\x28\x00\x00\x00"+struct.pack("<I",int(w))+struct.pack("<I",int(h))+"\x01\x00"
				dib_data += struct.pack("<H",int(bpp))+"\x00"*8+struct.pack("<I",160)*2+"\x00"*8
				dib_data += buf[int(addr,16):]
				iter2 = add_pgiter (page,"[BMP]","escher","Blip",dib2bmp(dib_data),iter1)
				model.set_value(iter2,1,("escher","odraw","Blip"))
			#except:
			#	print 'Failed to construct DIB data'
		elif "b64" == chtype.lower():
			b64decode (page,buf[int(chaddr,16):],iter1)
		elif "cvx" == chtype.lower():
			cvx.parse (page,buf[int(chaddr,16):],iter1)
		elif "esc" == chtype.lower():
			escher.parse (model,buf[int(chaddr,16):],iter1)
		elif "cmx" == chtype.lower():
			cdr.cdr_open (buf[int(chaddr,16):],page,iter1)
		elif "icc" == chtype.lower():
			icc.parse (page,buf[int(chaddr,16):],iter1)
		elif "cdx" == chtype.lower():
			chdraw.open (page,buf[int(chaddr,16):],iter1)
		elif "yep" == chtype.lower():
			yep.parse (page,buf[int(chaddr,16):],iter1)
		elif "yep0" == chtype.lower():
			yep.parse (page,buf[int(chaddr,16):],iter1,0)
			
		elif "emf" == chtype.lower():
			pt = page.type
			page.type = "EMF"
			mf.mf_open (buf[int(chaddr,16):],page,iter1)
			page.type = pt
		elif "pix" == chtype.lower():
#			try:
				off = int(chaddr,16)
				ntype = model.get_value(iter1,1)
				if off:
					iter2 = add_pgiter(page,"Picture","escher","Blip",buf[off:],iter1)
					model.set_value(iter2,1,("escher","odraw","Blip"))
				else:
					model.set_value(iter1,1,("escher","odraw","Blip"))
					page.hd.hv.parent.on_row_activated(page.hd.hv,model.get_path(iter1),None)
#			except:
#				print "Failed to add as a picture"
		elif "dump" == chtype.lower():
			dlg = gtk.FileChooserDialog('Save...', action=gtk.FILE_CHOOSER_ACTION_SAVE, buttons=(gtk.STOCK_OK,gtk.RESPONSE_OK,gtk.STOCK_CANCEL,gtk.RESPONSE_CANCEL))
			dlg.set_local_only(True)
			resp = dlg.run()
			fname = dlg.get_filename()
			dlg.hide()
			if resp != gtk.RESPONSE_CANCEL:
				nlen = model.get_value(iter1,2)
				if chaddr != 0:
					pos = chaddr.find(":")
					if pos != -1:
						endaddr = chaddr[pos+1:]
						chaddr = chaddr[:pos]
						value = model.get_value(iter1,3)[int(chaddr,16):int(endaddr,16)]
					else:
						value = model.get_value(iter1,3)[int(chaddr,16):]
				else:
					value = model.get_value(iter1,3)[int(chaddr,16):]

				if nlen != None:
					f = open(fname,'wb')
					f.write(value)
					f.close()
				else:
					print "Nothing to save"
		elif "wmf" == chtype.lower() or "apwmf" == chtype.lower():
			pt = page.type
			page.type = chtype.upper()
			mf.mf_open (buf[int(chaddr,16):],page,iter1)
			page.type = pt
		elif "xls" == chtype.lower():
			ch2 = chaddr[1]
			if ch2.isdigit():
				coladdr = ord(chaddr[0].lower()) - 97
				rowaddr = int(chaddr[1:]) - 1
			else:
				coladdr = 26*(ord(chaddr[0].lower()) - 96)+ ord(chaddr[1].lower()) - 97
				rowaddr = int(chaddr[2:]) - 1
			page.search = gtk.TreeStore(gobject.TYPE_STRING, gobject.TYPE_INT, gobject.TYPE_STRING, gobject.TYPE_INT)
			model.foreach(xlsfind,(page,rowaddr,coladdr))
			page.show_search("XLS: cell %s"%chaddr)
		elif "rx2" == chtype.lower():
			newL = struct.unpack('>I', buf[int(chaddr,16)+4:int(chaddr,16)+8])[0]
			rx2.parse (model,buf[int(chaddr,16):int(chaddr,16)+newL],0,iter1)
		elif "dib" == chtype.lower():
			iter2 = add_pgiter (page,"[BMP]","",0,dib2bmp(buf[int(chaddr,16):]),iter1)
			model.set_value(iter2,1,("escher","odraw","Blip"))
		elif "pct" == chtype.lower():
			pict.parse (page,buf,iter1)
		elif "pm6" == chtype.lower():
			off = int(chaddr,16)
			pm6.open (page,buf,iter1,off)
		elif "vba" == chtype.lower():
			# off = int(chaddr,16)
			vba.parse (page,buf,iter1)
		elif "zip" == chtype.lower():
			try:
				print int(chaddr,16)
				decobj = zlib.decompressobj()
				output = decobj.decompress(buf[int(chaddr,16):])
				add_pgiter (page,"[Decompressed data]","",0,output,iter1)
				tail = decobj.unused_data
				if len(tail) > 0:
					add_pgiter (page,"[Tail]","",0,tail,iter1)
			except:
				print "Failed to decompress"

	elif cmd[0] == "?":
		ctype = cmd[1]
		carg = cmd[2:]
		# convert line to hex or unicode if required
		data = arg_conv(ctype,carg)
		model = page.view.get_model()
		page.search = gtk.TreeStore(gobject.TYPE_STRING, gobject.TYPE_INT, gobject.TYPE_STRING, gobject.TYPE_INT)
		if ctype == 'r' or ctype == 'R':
			model.foreach(recfind,(page,data))
		else:
			model.foreach(cmdfind,(page,data))
		page.show_search(carg)