Exemplos de parse em Python, exemplos de vsd.parse em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: App.py Projeto: renyxa/re-lab

	def fload(self,buf="",parent=None,package=None):
		self.pname = os.path.split(self.fname)[1]
		f = None
		if buf == "":
			offset = 0
			f = open(self.fname,"rb")
			buf = f.read()

		if buf[0:7] == "\0\0IIXPR" or buf[0:7] == "\0\0MMXPR":
			self.type = qxp.open(self, buf, parent)
			return 0

		if buf[:8] == "\x89PNG\x0d\x0a\x1a\x0a":
			self.type = pngot.open(self, buf, parent)
			return 0

		if buf[0:6] == "\x1aWLF10":
			self.type = vfb.open(self, buf, parent)
			return 0

		if buf[0:6] == "<?xml " or buf[0:14] == "\xff\xfe<\0?\0x\0m\0l\0 \0" or buf[0:14] == "\xfe\xff\0<\0?\0x\0m\0l\0 ":
			self.type = otxml.open(buf, self, parent)
			return 0

		if buf[0:8] == "CPT9FILE":
			self.type = cpt.open(buf, self, parent)
			return 0

		if buf[0:8] == "VjCD0100":
			self.type = chdraw.open(self, buf, parent)
			return 0

		if buf[0:4] == "EVHD":
			self.type = yep.parse(self, buf, parent)
			return 0

		if buf[0:4] == "XPIH":
			self.type = yep.parse_ppi(self, buf, parent)
			return 0

		if buf[0:5].lower() == "{\\rtf":
			self.type = rtf.open(buf, self, parent)
			return 0

		if buf[0:8] == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1":
			self.type = ole.ole_open(buf, self, parent)
			return 0

		if buf[0:18] == "Visio (TM) Drawing":
			self.type = vsd.parse(self, buf, parent)
			return 0

		if buf[0:2] == "\x50\xc3":
			self.type = "CLP"
			clp.open (buf,self, parent)
			return 0

		if buf[0:6] == "VCLMTF":
			self.type = "SVM"
			svm.open (buf,self, parent)
			return 0

		if buf[:4] == "\x12\x90\xa8\x7f":
			nki.open(self,buf,parent)
			return 0

#		This one should be before CDR to properly handle v17
		if parent != None:
			parname = self.model.get_value(parent,0)
			if parname == "[content]/dataFileList.dat":
				print "Found XMLish CDR version"
				self.wtable = self.model.get_value(parent,3).split("\n")
			#elif ("[content/" in parname or "[font]/" in parname) and ".dat" in parname:
			elif ".dat" in parname:
				if self.wdata == None:
					self.wdata = {}
				p = parname.rfind("/")
				self.wdata[parname[p+1:]] = parent

		if buf[0:4] == "RIFF" and buf[8:11].lower() == "cdr":
			self.type = "CDR%x"%(ord(buf[11])-0x30)
			print ('Probably CDR')
			cdr.cdr_open(buf,self, parent)
			print (self.version)
			return 0

		if buf[0:4] == "RIFF" and buf[8:11] == "CMX":
			self.type = "CMX"
			cdr.cdr_open(buf,self, parent,"cmx")
			return 0

		if buf[0:2] == "WL":
			self.type = "CDR2"
			wld.open (buf,self, parent)
			return 0

		if buf[0:2] == "\xcc\xdc":
			self.type = "CPL"
			cpl.open (buf,self, parent)
			return 0

		if buf[0:4] == "8BGR":
			self.type = "BGR"
			abr.abr_open(buf,self, parent,"bgr")
			return 0

		if buf[4:8] == "8BIM":
			self.type = "ABR"
			abr.abr_open(buf,self, parent,"abr")
			return 0

		if buf[0:4] == "\xd7\xcd\xc6\x9a":
			self.type = "APWMF"
			mf.mf_open(buf,self, parent)
			print ("Aldus Placeable WMF")
			return 0

		if buf[0:8] == "\x4c\x00\x52\x00\x46\x00\x00\x00":
			self.type = "LRF"
			lrf.open(buf, self, parent)
			print ("LRF")
			return 0

		if buf[0:6] == "\x01\x00\x09\x00\x00\x03":
			self.type = "WMF"
			print ("Probably WMF")
			mf.mf_open(buf,self, parent)
			return 0

		if buf[40:44] == "\x20\x45\x4d\x46":
			self.type = "EMF"
			print ("Probably EMF")
			mf.mf_open(buf,self, parent)
			return 0

		if buf[0:2] =="KF" and buf[2] != "\x00":
			self.type = "CDW"
			print ("Probably CDW")
			cdw.open(buf,self, parent)
			return 0

		if buf[0:4] == "CAT " and buf[0x8:0xc] == "REX2":
			self.type = "REX2"
			print ("Probably REX2")
			rx2.open(buf,self, parent)
			return 0

		if buf[0:20] == "Kaydara FBX Binary  ":
			self.type = "FBX"
			print ("Probably FBX")
			fbx.open(buf,self, parent)
			return 0

		if buf[4:19] == "Standard Jet DB" or buf[4:19] == "Standard ACE DB":
			self.type = "MDB"
			print ("Probably MDB")
			mdb.parse (buf,self, parent)
			return 0

		if buf[0:4] == "\x50\x4b\x03\x04":
			self.type = "PKZIP"
			print ("Probably PK-ZIP")
			if f:
				f.close()
			pkzip.open (self.fname,self, parent)
			return 0

		palmtype = buf[0x3c:0x44]
		if palmtype in palm.palm_types.keys():
			self.type = "PALM"
			print ("Probably Palm e-book")
			palm.open(buf, self, parent, palmtype)
			return 0

		if buf[2:10] == 'BOOKDOUG':
			self.type = 'IMP'
			print ('Probably SoftBook e-book')
			sbimp.open(buf, self, parent)
			return 0

		if buf[0:8] == 'ITOLITLS':
			self.type = 'LIT'
			print ('Probably LIT')
			lit.open(buf,self,parent)
			return 0

		if buf[0:6] == 'bplist':
			self.type = 'PLIST'
			print ('Probably PLIST')
			plist.open(buf,self,parent)
			return 0

		fh_off = buf.find('FreeHand')
		if buf[0:3] == 'AGD':
			agd_off = 0
			agd_ver = ord(buf[agd_off+3])
			try:
				self.type = "FH"
				print ("Probably Freehand")
				fh.fh_open(buf,self)
				return 0
			except:
				print ("Check for Freehand failed...")
		elif fh_off != -1:
			agd_off = buf.find('AGD')
			if agd_off > fh_off:
				agd_ver = ord(buf[agd_off+3])
				try:
					self.type = "FH"
					print ("Probably Freehand 9+")
					fh.fh_open(buf,self, parent)
					return 0
				except:
					print ("Check for Freehand 9+ failed...")
		if buf[0:4] == "FHD2" or buf[0:4] == "acf3":
			self.type = "FH12"
			fh12.fh_open(buf, self, parent, 0)
			return 0
		if buf[0:2] == "FH":
			fh_off = buf.find('FHDocHeader')
			if fh_off != -1:
				try:
					self.type = "FH"
					print ("Probably Freehand <5")
					fh.fh_open(buf, self, parent, 0)
					return 0
				except:
					print ("Check for Freehand <5 failed...")
		if buf[8:11] == 'xV4':
			self.type = 'ZMF'
			print ('Probably Zoner Draw 4+')
			zmf.zmf4_open(buf, self, parent)
			return 0

		# NOTE: 0x29a is 666 in decimal .-)
		if buf[0:2] == '\x9a\x02' and buf[2:4] in ['\01\0', '\02\0', '\03\0', '\04\0']:
			self.type = 'ZBR'
			print ('Probably Zebra Metafile')
			zbr.open(buf, self, parent)
			return 0

		if buf[0:9] == 'ZonerBMIa':
			self.type = 'BMI'
			print ('Probably Zoner Bitmap')
			bmi.open(buf, self, parent)
			return 0

		size = (ord(buf[1]) | (ord(buf[2]) << 8)) + 4
		if buf[0] == '\0' and (size == len(buf) or (size < len(buf) and buf[4:7] == "\x80\x80\x04")):
			self.type = 'IWA'
			if not self.subtype:
				self.subtype = iwa.detect(package)
			print('Probably Apple iWork file')
			iwa.open(buf, self, parent, self.subtype)
			return 0

		if buf[0:0x17] == 'Software602\r\nCalc602 v.' and (buf[0x1c:0x24] == 'Tabulka\x1a' or buf[0x1c:0x21] == 'Graf\x1a'):
			self.type = 'C602'
			print('Probably C602 file')
			if buf[0x1c] == 'T':
				c602.parse_spreadsheet(buf, self, parent)
			else:
				c602.parse_chart(buf, self, parent)
			return 0

		if re.match('@[A-Z]{2} .*?\\r\\n', buf) and buf[-1] == '\x1a':
			self.type = 'T602'
			print('Probably T602 file')
			t602.parse(buf, self, parent)
			return 0
		if buf[0:4] == "\0\0\x02\0":
			if (buf[4]=='\x20' or buf[4]=='\x21') and buf[5]=='\x51':
				self.type = 'QUWQ'
				print('Probably Quattro Wq file')
				quattro_wq.wq_open(self, buf, parent)
				return 0

		# QuarkXPress 1.x data fork
		if buf[0:4] == '\x00\x1c\x00\x1c' or buf[0:4] == '\x00\x20\x00\x20':
			try:
				qxp.open_v1(self,buf,parent)
				self.type = 'QXP5'
				return 0
			except:
				print ("Failed after attempt to parse as QXP1...")
		if buf[0:4] == '\x00\x26\x00\x26':
			try:
				qxp.open_v2(self,buf,parent)
				self.type = 'QXP5'
				return 0
			except:
				print ("Failed after attempt to parse as QXP2...")

		if parent == None:
			parent = add_pgiter(self, "File", "file","unknown",buf)

		# Likely false detection for DRW
		if buf[0:3] == "\x01\xff\x02":
			try:
				drw.open(self,buf,parent)
				self.model.set_value(parent, 0, "DRW")
			except:
				print ("Failed after attempt to parse as DRW...")
		if buf[0:4] == "\xe7\xac\x2c\x00":
			self.type = 'pub1'
			print('Probably publisher 1 file')
			publisher1.publisher_open(self, buf, parent)

		return 0

Exemplo n.º 2

0

Exibir arquivo

def my_open(buf, page, parent=None):
    dircache = {}
    vbaiter = None
    docdata = ""
    docdataiter = None
    tbliter = None
    dirflag = 0
    ftype = ""
    try:
        if parent is None:
            gsffilename = page.fname
        else:
            # need to save tmp file to pass to gsf
            gsffilename = "tmp%s" % time.time()
            f = open(gsffilename, "wb")
            f.write(buf)
            f.close()
        gsfout = subprocess.check_output(["gsf", "list", gsffilename])
        print gsfout
        print "-----------------"
        for i in gsfout.split("\n")[1:-1]:
            if i[0] == "f":
                # gsf sometimes lists date even for files. Or, rather, it
                # seems that it misrepresents empty dirs as (empty) files.
                # I have observed this with 'Objects' in many .pub files.
                if i[5] != ' ':
                    fullname = " ".join(i.split()[4:])
                else:
                    fullname = " ".join(i.split()[2:])
                if not len(fullname):
                    fullname = " ".join(i.split()[2:])
                if "/" in fullname:
                    fns = fullname.split("/")
                    cdir = "/".join(fns[:-1])
                    fn = fns[-1]
                else:
                    fn = fullname
                    cdir = ""
                if len(fn) and ord(fn[0]) < 32:
                    fn = fn[1:]
                pn = None
                if cdir:
                    cdir_to_treeiter(page, parent, cdir, dircache)
                    pn = dircache["/" + cdir]
                data = subprocess.check_output(
                    ["gsf", "cat", gsffilename, fullname])
                iter1 = add_pgiter(page, fn, "ole", fn, data, pn)

                if fn == "DesignerDoc":
                    ftype = "dsf"
                    page.model.set_value(iter1, 1, ("dsf", dirflag))
                    dsf.open(page, data, iter1)

                if (fn == "EscherStm"
                        or fn == "EscherDelayStm"):  # and infchild.size()>0:
                    ftype = "escher"
                    page.model.set_value(iter1, 1, ("escher", dirflag))
                    escher.parse(
                        page.model, data, iter1, "pub"
                    )  # currently I don't parse it automagically for MSDOC
                if fn == "MagicTab":
                    ftype = "wls"
                    page.model.set_value(iter1, 1, ("wls", dirflag))
                    wls.parse(page, data, iter1)
                if fn == "CONTENTS":
                    if data[6:11] == "WT602":
                        ftype = "wt602"
                        page.model.set_value(iter1, 1, ("wt602", dirflag))
                        wt602.parse(page, data, iter1)
                    elif fullname.split('/')[0] == "OleObjects":
                        # Nested OLE objects (or images) in WT602
                        wt602.parse_object(page, data, iter1)
                    else:
                        ftype = "quill"
                        page.model.set_value(iter1, 1, ("quill", dirflag))
                        quill.parse(page, data, iter1)
                if fn == "Contents":
                    if data[:2] == "\xe8\xac":  # take signature into account
                        ftype = "pub"
                        page.model.set_value(iter1, 1, ("pub", dirflag))
                        pub.parse(page, data, iter1)
                if fn == "VisioDocument":
                    ftype = "vsd"
                    page.model.set_value(iter1, 1,
                                         ("vsd", dirflag))  # level = 1?
                    vsd.parse(page, data, iter1)
                if fn == "PageMaker":
                    ftype = "pm"
                    page.model.set_value(iter1, 1, ("pm", dirflag))
                    pm6.open(page, data, iter1)
                if fn == "WordDocument":
                    ftype = "doc"
                    page.model.set_value(iter1, 1,
                                         ("doc", dirflag))  #level = 1
                    doc.parse(page, data, iter1)
                if fn == "1Table" or fn == "0Table":
                    page.wtable = iter1
                if fn == "Data" and page.type == "DOC":
                    page.wdata = iter1
                if fn == "Book" or fn == "Workbook":
                    page.model.set_value(iter1, 1, ("xls", dirflag))
                    ftype = xls.parse(page, data, iter1)
                if fn == "PowerPoint Document" or fn == "Pictures":
                    ftype = "ppt"
                    page.model.set_value(iter1, 1, ("ppt", dirflag))
                    ppt.parse(page, data, iter1)
                if fn == "NativeContent_MAIN":
                    ftype = "qpw"
                    page.model.set_value(iter1, 1, ("qpw", dirflag))
                    qpw.parse(page, data, iter1)
                if fn == "Signature" and data[:4] == '\x60\x67\x01\x00':
                    ftype = "ppp"  #PagePlus OLE version (9.x?)
                if (fn == "contents"
                        or fn == "SCFFPreview") and ftype == "ppp":
                    ppp.parse(page, data, iter1, fn)

                # I've no idea if this is really the signature, but it is
                # present in all files I've seen so far
                if fn == "Header" and data[0xc:0xf] == 'xV4':
                    ftype = 'zmf'
                    zmf.zmf2_open(page, data, iter1, fn)
                if fn[-4:] == '.zmf':
                    ftype = 'zmf'
                    zmf.zmf2_open(page, data, iter1, fn)

                if fn == "VBA":
                    page.type = "vba"
                    ftype = "vba"
                #if (ftype == "vba" and fn == "dir") or
                if "vba/dir" in fullname.lower():
                    page.model.set_value(iter1, 1, ("vba", dirflag))
                    vbaiter = iter1
                    vbadata = data

                if "SummaryInformation" in fn:
                    page.model.set_value(iter1, 1, ("ole", "propset"))

                if parent is None:
                    try:
                        os.remove(gsffilename)
                    except:
                        pass
            else:
                if i.split()[2] == "VBA":
                    page.type = "vba"
                    ftype = "vba"
        if vbaiter != None:
            vba.parse(page, vbadata, vbaiter)

    except subprocess.CalledProcessError:
        print "Failed to run gsf. Please install libgsf."
        return

    ftype = "TEST"
    return ftype

Exemplo n.º 3

0

Exibir arquivo

def gsf_get_children(page, infile, parent, ftype, dirflag=0):
    vbaiter = None
    docdata = ""
    docdataiter = None
    tbliter = None
    for i in range(infile.num_children()):
        infchild = infile.child_by_index(i)

        infname = infile.name_by_index(i)
        chsize = infchild.size()
        #		print "Name ", infname, dirflag

        if ord(infname[0]) < 32:
            infname = infname[1:]

        if infname == "dir":
            infuncomp = infchild.uncompress()
            data = infuncomp.read(infuncomp.size())
        else:
            data = infchild.read(chsize)

        iter1 = add_pgiter(page, infname, "ole", dirflag, data)

        if (infname == "EscherStm"
                or infname == "EscherDelayStm") and chsize > 0:
            ftype = "escher"
            page.model.set_value(iter1, 1, ("escher", dirflag))
            escher.parse(
                page.model, data, iter1,
                "pub")  # currently I don't parse it automagically for MSDOC
        if infname == "CONTENTS":
            if data[6:11] == "WT602":
                ftype = "wt602"
                page.model.set_value(iter1, 1, ("wt602", dirflag))
                wt602.parse(page, data, iter1)
            else:
                ftype = "quill"
                page.model.set_value(iter1, 1, ("quill", dirflag))
                quill.parse(page, data, iter1)
        if infname == "Contents":
            if data and data[:2] == "\xe8\xac":  # take signature into account
                ftype = "pub"
                page.model.set_value(iter1, 1, ("pub", dirflag))
                pub.parse(page, data, iter1)
        if infname == "VisioDocument":
            ftype = "vsd"
            page.model.set_value(iter1, 1, ("vsd", dirflag))  # level = 1?
            # choose vsd or vsd2
            vsd.parse(page, data, iter1)
        if infname == "PageMaker":
            ftype = "pm"
            page.model.set_value(iter1, 1, ("pm", dirflag))
            pm6.open(page, data, iter1)
        if infname == "WordDocument":
            ftype = "doc"
            page.model.set_value(iter1, 1, ("doc", dirflag))  #level = 1
            doc.parse(page, data, iter1)
        if infname == "1Table" or infname == "0Table":
            page.wtable = iter1
        if infname == "Data":
            page.wdata = iter1

        if infname == "Book" or infname == "Workbook":
            page.model.set_value(iter1, 1, ("xls", dirflag))
            ftype = xls.parse(page, data, iter1)
        if infname == "PowerPoint Document" or infname == "Pictures" and data != None:
            ftype = "ppt"
            page.model.set_value(iter1, 1, ("ppt", dirflag))
            ppt.parse(page, data, iter1)
        if infname == "NativeContent_MAIN":
            ftype = "qpw"
            page.model.set_value(iter1, 1, ("qpw", dirflag))
            qpw.parse(page, data, iter1)
        if infname == "Signature" and data[:4] == '\x60\x67\x01\x00':
            ftype = "ppp"  #PagePlus OLE version (9.x?)
        if (infname == "contents"
                or infname == "SCFFPreview") and ftype == "ppp":
            ppp.parse(page, data, iter1, infname)

        # I've no idea if this is really the signature, but it is
        # present in all files I've seen so far
        if infname == "Header" and data[0xc:0xf] == 'xV4':
            ftype = 'zmf'
            zmf.zmf2_open(page, data, iter1, infname)
        if infname[-4:] == '.zmf':
            ftype = 'zmf'
            zmf.zmf2_open(page, data, iter1, infname)

        if infname == "VBA":
            page.type = ftype
            ftype = "vba"
        if ftype == "vba" and infname == "dir":
            page.model.set_value(iter1, 1, ("vba", dirflag))
            vbaiter = iter1
            vbadata = data

        if (infile.num_children() > 0):
            page.model.set_value(iter1, 1, (ftype, 1))
            gsf_get_children(page, infchild, iter1, ftype, 0)
        if "SummaryInformation" in infname:
            page.model.set_value(iter1, 1, ("ole", "propset"))

    if vbaiter != None:
        vba.parse(page, vbadata, vbaiter)

    return ftype

Exemplo n.º 4

0

Exibir arquivo

Arquivo: ole.py Projeto: renyxa/re-lab

def my_open (buf,page,parent=None):
	dircache = {}
	vbaiter = None
	docdata = ""
	docdataiter = None
	tbliter = None
	dirflag=0
	ftype = ""
	tmpfile = None
	try:
		if parent is None:
			gsffilename = page.fname
		else:
			# need to save tmp file to pass to gsf
			(tmpfd, tmpfile) = mkstemp()
			gsffilename = tmpfile
			os.write(tmpfd, buf)
			os.close(tmpfd)
		gsfout = subprocess.check_output(["gsf", "list", gsffilename])
		print gsfout
		print "-----------------"
		for i in gsfout.split("\n")[1:-1]:
			if i[0] == "f":
				# gsf sometimes lists date even for files. Or, rather, it
				# seems that it misrepresents empty dirs as (empty) files.
				# I have observed this with 'Objects' in many .pub files.
				if i[5] != ' ':
					fullname = " ".join(i.split()[4:])
				else:
					fullname = " ".join(i.split()[2:])
				if not len(fullname):
					fullname = " ".join(i.split()[2:])
				if "/" in fullname:
					fns = fullname.split("/")
					cdir = "/".join(fns[:-1])
					fn = fns[-1]
				else:
					fn = fullname
					cdir = ""
				if len(fn) and ord(fn[0]) < 32: 
					fn = fn[1:]
				if cdir:
					cdir_to_treeiter(page,parent,cdir,dircache)
					pn = dircache["/"+cdir]
				else:
					pn = parent
				data = subprocess.check_output(["gsf", "cat", gsffilename, fullname])
				iter1 = add_pgiter(page,fn,"ole",fn,data,pn)
				
				if fn == "DesignerDoc":
					ftype = "dsf"
					page.model.set_value(iter1,1,("dsf",dirflag))
					dsf.open (page, data, iter1)
				
				if (fn == "EscherStm" or fn == "EscherDelayStm"): # and infchild.size()>0:
					ftype = "escher"
					page.model.set_value(iter1,1,("escher",dirflag))
					escher.parse (page.model,data,iter1,"pub") # currently I don't parse it automagically for MSDOC
				if fn == "MagicTab":
					ftype = "wls"
					page.model.set_value(iter1,1,("wls",dirflag))
					wls.parse (page,data,iter1)
				if fn == "CONTENTS":
					if data[6:11] == "WT602":
						ftype = "wt602"
						page.model.set_value(iter1,1,("wt602",dirflag))
						wt602.parse (page,data,iter1)
					elif fullname.split('/')[0] == "OleObjects":
						# Nested OLE objects (or images) in WT602
						wt602.parse_object(page, data, iter1)
					else:
						ftype = "quill"
						page.model.set_value(iter1,1,("quill",dirflag))
						quill.parse (page,data,iter1)
				if fn == "Contents":
					if data[:2] == "\xe8\xac": # take signature into account
						ftype = "pub"
						page.model.set_value(iter1,1,("pub",dirflag))
						page.appcontentdoc=pub.PublisherContentDoc(page,iter1)
						page.appcontentdoc.parse(data)
				if fn == "VisioDocument":
					ftype = "vsd"
					page.model.set_value(iter1,1,("vsd",dirflag)) # level = 1?
					vsd.parse (page, data, iter1)
				if fn == "PageMaker":
					ftype = "pm"
					page.model.set_value(iter1,1,("pm",dirflag))
					pm6.open (page, data, iter1)
				if fn == "WordDocument":
					ftype = "doc"
					page.model.set_value(iter1,1,("doc",dirflag)) #level = 1
					doc.parse (page, data, iter1)
				if fn == "1Table" or fn == "0Table":
					page.wtable = iter1
				if fn == "Data" and page.type == "DOC":
					page.wdata = iter1
				if fn == "Book" or fn == "Workbook":
					page.model.set_value(iter1,1,("xls",dirflag))
					ftype = xls.parse (page, data, iter1)
				if fn == "PowerPoint Document" or fn == "Pictures":
					ftype = "ppt"
					page.model.set_value(iter1,1,("ppt",dirflag))
					ppt.parse (page, data, iter1)
				if fn == "NativeContent_MAIN":
					ftype = "qpw"
					page.model.set_value(iter1,1,("qpw",dirflag))
					qpw.parse (page, data, iter1)
				if fn == "Signature" and data[:4] == '\x60\x67\x01\x00':
					ftype = "ppp"  #PagePlus OLE version (9.x?)
				if (fn == "contents" or fn == "SCFFPreview") and ftype == "ppp":
					ppp.parse(page,data,iter1,fn)
				
				# I've no idea if this is really the signature, but it is
				# present in all files I've seen so far
				if fn == "Header" and data[0xc:0xf] == 'xV4':
					ftype = 'zmf'
					zmf.zmf2_open(page, data, iter1, fn)
				if fn[-4:] == '.zmf':
					ftype = 'zmf'
					zmf.zmf2_open(page, data, iter1, fn)
				if fn[-4:] == '.BMI' and fullname.split('/')[0] == 'Bitmaps':
					ftype = 'bmi'
					bmi.open(data, page, iter1)

				if fn == "VBA":
					page.type = "vba"
					ftype = "vba"
				#if (ftype == "vba" and fn == "dir") or 
				if "vba/dir" in fullname.lower():
					page.model.set_value(iter1,1,("vba",dirflag))
					vbaiter = iter1
					vbadata = data
				
				if "SummaryInformation" in fn:
					page.model.set_value(iter1,1,("ole","propset"))
			else:
				if i.split()[2] == "VBA":
					page.type = "vba"
					ftype = "vba"
		if vbaiter != None:
			vba.parse (page, vbadata, vbaiter)
	
	except subprocess.CalledProcessError:
		print "Failed to run gsf. Please install libgsf."

	if tmpfile:
		try: os.remove(tmpfile)
		except: pass

	return ftype

Exemplo n.º 5

0

Exibir arquivo

Arquivo: ole.py Projeto: renyxa/re-lab

def gsf_get_children(page,infile,parent,ftype,dirflag=0):
	vbaiter = None
	docdata = ""
	docdataiter = None
	tbliter = None
	for i in range(infile.num_children()):
		infchild = infile.child_by_index(i)

		infname = infile.name_by_index(i)
		chsize = infchild.size()
#		print "Name ", infname, dirflag

		if ord(infname[0]) < 32: 
			infname = infname[1:]


		if infname == "dir":
			infuncomp = infchild.uncompress()
			data = infuncomp.read(infuncomp.size())
		else:
			data = infchild.read(chsize)
			
		iter1 = add_pgiter (page, infname, "ole", dirflag, data)
			
		if (infname == "EscherStm" or infname == "EscherDelayStm") and chsize>0:
			ftype = "escher"
			page.model.set_value(iter1,1,("escher",dirflag))
			escher.parse (page.model,data,iter1,"pub") # currently I don't parse it automagically for MSDOC
		if infname == "CONTENTS":
			if data[6:11] == "WT602":
				ftype = "wt602"
				page.model.set_value(iter1,1,("wt602",dirflag))
				wt602.parse (page,data,iter1)
			else:
				ftype = "quill"
				page.model.set_value(iter1,1,("quill",dirflag))
				quill.parse (page,data,iter1)
		if infname == "Contents":
			if data and data[:2] == "\xe8\xac": # take signature into account
				ftype = "pub"
				page.model.set_value(iter1,1,("pub",dirflag))
				pub.parse (page,data,iter1)
		if infname == "VisioDocument":
			ftype = "vsd"
			page.model.set_value(iter1,1,("vsd",dirflag)) # level = 1?
		# choose vsd or vsd2
			vsd.parse (page, data, iter1)
		if infname == "PageMaker":
			ftype = "pm"
			page.model.set_value(iter1,1,("pm",dirflag))
			pm6.open (page, data, iter1)
		if infname == "WordDocument":
			ftype = "doc"
			page.model.set_value(iter1,1,("doc",dirflag)) #level = 1
			doc.parse (page, data, iter1)
		if infname == "1Table" or infname == "0Table":
			page.wtable = iter1
		if infname == "Data":
			page.wdata = iter1

		if infname == "Book" or infname == "Workbook":
			page.model.set_value(iter1,1,("xls",dirflag))
			ftype = xls.parse (page, data, iter1)
		if infname == "PowerPoint Document" or infname == "Pictures" and data != None:
			ftype = "ppt"
			page.model.set_value(iter1,1,("ppt",dirflag))
			ppt.parse (page, data, iter1)
		if infname == "NativeContent_MAIN":
			ftype = "qpw"
			page.model.set_value(iter1,1,("qpw",dirflag))
			qpw.parse (page, data, iter1)
		if infname == "Signature" and data[:4] == '\x60\x67\x01\x00':
			ftype = "ppp"  #PagePlus OLE version (9.x?)
		if (infname == "contents" or infname == "SCFFPreview") and ftype == "ppp":
			ppp.parse(page,data,iter1,infname)

		# I've no idea if this is really the signature, but it is
		# present in all files I've seen so far
		if infname == "Header" and data[0xc:0xf] == 'xV4':
			ftype = 'zmf'
			zmf.zmf2_open(page, data, iter1, infname)
		if infname[-4:] == '.zmf':
			ftype = 'zmf'
			zmf.zmf2_open(page, data, iter1, infname)

		if infname == "VBA":
			page.type = ftype
			ftype = "vba"
		if ftype == "vba" and infname == "dir":
			page.model.set_value(iter1,1,("vba",dirflag))
			vbaiter = iter1
			vbadata = data

		if (infile.num_children()>0):
			page.model.set_value(iter1,1,(ftype,1))
			gsf_get_children(page,infchild,iter1,ftype,0)
		if "SummaryInformation" in infname:
			page.model.set_value(iter1,1,("ole","propset"))

	if vbaiter != None:
		vba.parse (page, vbadata, vbaiter)

	return ftype

Exemplo n.º 6

0

Exibir arquivo

Arquivo: App.py Projeto: rgwan/re-lab

	def fload(self,buf="",parent=None,package=None):
		self.pname = os.path.split(self.fname)[1]
		if buf == "":
			offset = 0
			f = open(self.fname,"rb")
			buf = f.read()

		if buf[2:8] == "IIXPR3":
			self.type = qxp.open(self, buf, parent)
			return 0

		if buf[:8] == "\x89PNG\x0d\x0a\x1a\x0a":
			self.type = pngot.open(self, buf, parent)
			return 0

		if buf[0:6] == "\x1aWLF10":
			self.type = vfb.open(self, buf, parent)
			return 0

		if buf[0:6] == "<?xml " or buf[0:14] == "\xff\xfe<\0?\0x\0m\0l\0 \0" or buf[0:14] == "\xfe\xff\0<\0?\0x\0m\0l\0 ":
			self.type = otxml.open(buf, self, parent)
			return 0

		if buf[0:8] == "CPT9FILE":
			self.type = cpt.open(buf, self, parent)
			return 0

		if buf[0:8] == "VjCD0100":
			self.type = chdraw.open(self, buf, parent)
			return 0

		if buf[0:4] == "EVHD":
			self.type = yep.parse(self, buf, parent)
			return 0

		if buf[0:4] == "XPIH":
			self.type = yep.parse_ppi(self, buf, parent)
			return 0

		if buf[0:5].lower() == "{\\rtf":
			self.type = rtf.open(buf, self, parent)
			return 0

		if buf[0:8] == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1":
			self.type = ole.ole_open(buf, self, parent)
			return 0

		if buf[0:18] == "Visio (TM) Drawing":
			self.type = vsd.parse(self, buf, parent)
			return 0

		if buf[0:2] == "\x50\xc3":
			self.type = "CLP"
			clp.open (buf,self, parent)
			return 0

		if buf[0:6] == "VCLMTF":
			self.type = "SVM"
			svm.open (buf,self, parent)
			return 0

		if buf[:4] == "\x12\x90\xa8\x7f":
			nki.open(self,buf,parent)
			return 0

#		This one should be before CDR to properly handle v17
		if parent != None:
			parname = self.model.get_value(parent,0)
			if parname == "[content]/dataFileList.dat":
				print "Found XMLish CDR version"
				self.wtable = self.model.get_value(parent,3).split("\n")
			elif "[content]/" in parname and ".dat" in parname:
				if self.wdata == None:
					self.wdata = {}
				p = parname.rfind("/")
				self.wdata[parname[p+1:]] = parent
			
		if buf[0:4] == "RIFF" and buf[8:11].lower() == "cdr":
			self.type = "CDR%x"%(ord(buf[11])-0x30)
			print 'Probably CDR',
			cdr.cdr_open(buf,self, parent)
			print self.version
			return 0

		if buf[0:4] == "RIFF" and buf[8:11] == "CMX":
			self.type = "CMX"
			cdr.cdr_open(buf,self, parent,"cmx")
			return 0

		if buf[0:2] == "WL":
			self.type = "CDR2"
			wld.open (buf,self, parent)
			return 0

		if buf[0:2] == "\xcc\xdc":
			self.type = "CPL"
			cpl.open (buf,self, parent)
			return 0

		if buf[0:4] == "8BGR":
			self.type = "BGR"
			abr.abr_open(buf,self, parent,"bgr")
			return 0

		if buf[4:8] == "8BIM":
			self.type = "ABR"
			abr.abr_open(buf,self, parent,"abr")
			return 0

		if buf[0:4] == "\xd7\xcd\xc6\x9a":
			self.type = "APWMF"
			mf.mf_open(buf,self, parent)
			print "Aldus Placeable WMF"
			return 0

		if buf[0:8] == "\x4c\x00\x52\x00\x46\x00\x00\x00":
			self.type = "LRF"
			lrf.open(buf, self, parent)
			print "LRF"
			return 0

		if buf[0:6] == "\x01\x00\x09\x00\x00\x03":
			self.type = "WMF"
			print "Probably WMF"
			mf.mf_open(buf,self, parent)
			return 0

		if buf[40:44] == "\x20\x45\x4d\x46":
			self.type = "EMF"
			print "Probably EMF"
			mf.mf_open(buf,self, parent)
			return 0

		if buf[0:2] =="KF" and buf[2] != "\x00":
			self.type = "CDW"
			print "Probably CDW"
			cdw.open(buf,self, parent)
			return 0

		if buf[0:4] == "CAT " and buf[0x8:0xc] == "REX2":
			self.type = "REX2"
			print "Probably REX2"
			rx2.open(buf,self, parent)
			return 0
		
		if buf[0:20] == "Kaydara FBX Binary  ":
			self.type = "FBX"
			print "Probably FBX"
			fbx.open(buf,self, parent)
			return 0
		
		if buf[4:19] == "Standard Jet DB" or buf[4:19] == "Standard ACE DB":
			self.type = "MDB"
			print "Probably MDB"
			mdb.parse (buf,self, parent)
			return 0
		
		if buf[0:4] == "\x50\x4b\x03\x04":
			self.type = "PKZIP"
			print "Probably PK-ZIP"
			f.close()
			pkzip.open (self.fname,self, parent)
			return 0

		pdbtype = buf[0x3c:0x44]
                if pdbtype in pdb.pdb_types.keys():
			self.type = "PDB"
			print "Probably Palm e-book"
			pdb.open(buf, self, parent, pdbtype)
			return 0

		if buf[2:10] == 'BOOKDOUG':
			self.type = 'IMP'
			print 'Probably SoftBook e-book'
			sbimp.open(buf, self, parent)
			return 0

		if buf[0:8] == 'ITOLITLS':
			self.type = 'LIT'
			print 'Probably LIT'
			lit.open(buf,self,parent)
			return 0

		if buf[0:6] == 'bplist':
			self.type = 'PLIST'
			print 'Probably PLIST'
			plist.open(buf,self,parent)
			return 0

		fh_off = buf.find('FreeHand')
		if buf[0:3] == 'AGD':
			agd_off = 0
			agd_ver = ord(buf[agd_off+3])
			try:
				self.type = "FH"
				print "Probably Freehand"
				fh.fh_open(buf,self)
				return 0
			except:
				print "Check for Freehand failed..."
		elif fh_off != -1:
			agd_off = buf.find('AGD')
			if agd_off > fh_off:
				agd_ver = ord(buf[agd_off+3])
				try:
					self.type = "FH"
					print "Probably Freehand 9+"
					fh.fh_open(buf,self, parent)
					return 0
				except:
					print "Check for Freehand 9+ failed..."
		else:
			fh_off = buf.find('FHDocHeader')
			if fh_off != -1:
				if buf[0:2] == "FH":
					self.type = "FH"
					print "Probably Freehand <5"
					fh.fh_open(buf, self, parent, 0)
					return 0
		if buf[0:4] == "FHD2" or buf[0:4] == "acf3":
			self.type = "FH12"
			fh12.fh_open(buf, self, parent, 0)
			return 0
		if buf[8:11] == 'xV4':
			self.type = 'ZMF'
			print 'Probably Zoner Draw 4+'
			zmf.zmf4_open(buf, self, parent)
			return 0

		# NOTE: 0x29a is 666 in decimal .-)
		if buf[0:2] == '\x9a\x02' and buf[2:4] in ['\01\0', '\02\0', '\03\0', '\04\0']:
			self.type = 'ZBR'
			print 'Probably Zebra Metafile'
			zbr.open(buf, self, parent)
			return 0

		size = (ord(buf[1]) | (ord(buf[2]) << 8)) + 4
		if buf[0] == '\0' and (size == len(buf) or (size < len(buf) and buf[4:7] == "\x80\x80\x04")):
			self.type = 'IWA'
			if not self.subtype:
				self.subtype = iwa.detect(package)
			print('Probably Apple iWork file')
			iwa.open(buf, self, parent, self.subtype)
			return 0
			
		if buf[0:0x17] == 'Software602\r\nCalc602 v.' and (buf[0x1c:0x24] == 'Tabulka\x1a' or buf[0x1c:0x21] == 'Graf\x1a'):
			self.type = 'C602'
			print('Probably C602 file')
			if buf[0x1c] == 'T':
				c602.parse_spreadsheet(buf, self, parent)
			else:
				c602.parse_chart(buf, self, parent)
			return 0

		if parent == None:
			parent = add_pgiter(self, "File", "file","unknown",buf) 

		# Likely false detection for DRW
		if buf[0:3] == "\x01\xff\x02":
			try:
				drw.open(self,buf,parent)
				self.model.set_value(parent, 0, "DRW")
			except:
				print "Failed after attempt to parse as DRW..."

		return 0