def processdocx(self, inputfilepath): """ Convert a docx to html format, and calling """ zipFile = ZipFile(inputfilepath) entry = zipFile.getEntry("word/document.xml") stream = zipFile.getInputStream(entry) text = StreamSource(stream) factory = TransformerFactoryImpl() xslt = StreamSource(File(join(FascinatorHome.getPath(), "lib", "xslt", "docx2html.xsl"))) transformer = factory.newTransformer(xslt) tf = "/tmp/%s.html" % uuid.uuid4() transformer.transform(text, StreamResult(File(tf))) parser = DocxHtmlParser() parser.init() f = open(tf, "r") parser.feed(unescape(f.read())) f.close() try: remove(tf) except Exception, e: self.log.error("Failed to remove uploaded word file: %s." % tf) self.log.error(str(e))
from javax.xml.transform.stream import StreamSource, StreamResult from javax.xml.transform.sax import SAXResult from org.apache.fop.fo import ValidationException from net.sf.saxon.trans import XPathException from org.apache.fop.apps import *; RUNTIME_PATH = os.path.abspath(os.path.dirname(__file__)) CONFIG_PATH = os.path.join(RUNTIME_PATH, 'fop-config.xml') fop_factory = FopFactory.newInstance() fop_factory.setUserConfig(File(CONFIG_PATH)); xfrm_factory = TransformerFactory() wl2fo_tmplt = xfrm_factory.newTemplates(StreamSource(\ File(os.path.join(RUNTIME_PATH, "xslt", "wl2fo.xslt")))) normalize_tmplt = xfrm_factory.newTemplates(StreamSource(\ File(os.path.join(RUNTIME_PATH, "xslt", "normalize.xslt")))) def process_file(filename): base, ext = os.path.splitext(filename) print "Transforming %s (%s)... " % (base, os.path.abspath(filename)) , source = File(filename)