def rewrite_zpt(self, input_): """Rewrite the input_ by parsing it. Python expressions are passed to `rewrite_action` for processing """ output_gen = CustomXMLGenerator(self.output, encoding='utf-8') parser = HTMLGenerator(convert_charrefs=False) parser.parse_errors = [] filter = PythonExpressionFilter( parser, self.rewrite_action, filename=self.filename) filter.setContentHandler(output_gen) filter.setErrorHandler(handler.ErrorHandler()) filter.parse(input_) for err in parser.parse_errors: log.error( 'Parsing error in %s:%d \n\t%s', self.filename, err['lineno'], err['tag'], exc_info=False, ) if len(parser.parse_errors): raise PTParseError self.output.seek(0) return self.output.read()
def filter_svg(input, output, mode): """filter_svg(input:file, output:file, mode) Parses the SVG input from the input stream. For mode == 'hotspots' it filters out all layers except for hotspots and slices. Also makes hotspots visible. For mode == 'shadows' it filters out the shadows layer. """ mode_objs = [[m] for m in mode] if len(mode_objs) == 0: raise ValueError() output_gen = saxutils.XMLGenerator(output) parser = make_parser() filter = SVGFilter(parser, output_gen, mode_objs) filter.setFeature(handler.feature_namespaces, False) filter.setErrorHandler(handler.ErrorHandler()) # This little I/O dance is here to ensure that SAX parser does not stash away # an open file descriptor for the input file, which would prevent us from unlinking it later with open(input, "rb") as inp: contents = inp.read() contents_io = io.BytesIO(contents) source_object = saxutils.prepare_input_source(contents_io) filter.parse(source_object) del filter del parser del output_gen
def feed(self, handle, consumer): self._parser.setContentHandler( EventGenerator(consumer, self.interest_tags, _strip_and_combine)) self._parser.setErrorHandler(handler.ErrorHandler()) self._parser.parseFile(handle) consumer.end_record()
def load(odffile): from load import LoadParser from xml.sax import make_parser, handler z = zipfile.ZipFile(odffile) mimetype = z.read('mimetype') doc = OpenDocument(mimetype, add_generator=False) # Look in the manifest file to see if which of the four files there are manifestpart = z.read('META-INF/manifest.xml') manifest = manifestlist(manifestpart) for xmlfile in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'): if not manifest.has_key(xmlfile): continue try: xmlpart = z.read(xmlfile) doc._parsing = xmlfile parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(LoadParser(doc)) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(StringIO(xmlpart)) parser.parse(inpsrc) del doc._parsing except KeyError, v: pass
def __init__(self): handler.ContentHandler.__init__(self) self.data = None self.obj_stack = [] self.curtext = "" self.parser = sitedata_format.format.make_parser() self.parser.setContentHandler(self) self.parser.setErrorHandler(handler.ErrorHandler())
def test(): s1 = StringIO() parser = swissprot38.format_expression.make_parser() parser.setErrorHandler(handler.ErrorHandler()) parser.setContentHandler(saxutils.XMLGenerator(s1)) parser.parseString(text) s2 = StringIO() parser = swissprot38.format.make_parser() parser.setErrorHandler(handler.ErrorHandler()) parser.setContentHandler(saxutils.XMLGenerator(s2)) parser.parseString(text) s3 = StringIO() parser = swissprot38.format.expression.make_parser() parser.setErrorHandler(handler.ErrorHandler()) parser.setContentHandler(saxutils.XMLGenerator(s3)) parser.parseString(text) assert s1.getvalue() == s2.getvalue() == s3.getvalue()
def feed(self, handle, consumer): """Feeed a set of data into the scanner. Arguments: o handle - A handle with the information to parse. o consumer - The consumer that should be informed of events. """ self._parser.setContentHandler(EventGenerator(consumer, self.interest_tags )) # _strip_and_combine )) self._parser.setErrorHandler(handler.ErrorHandler()) self._parser.parseFile(handle)
def __loadxmlparts(z, manifest, doc, objectpath): """ Parses a document from its zipfile @param z an instance of zipfile.ZipFile @param manifest Manifest data structured in a dictionary @param doc instance of OpenDocument to feed in @param objectpath unicode string: path to an object """ assert (isinstance(z, zipfile.ZipFile)) assert (type(manifest) == type(dict())) assert (isinstance(doc, OpenDocument)) assert (type(objectpath) == type(u"")) from odf.load import LoadParser from defusedxml.sax import make_parser from xml.sax import handler for xmlfile in (objectpath + u'settings.xml', objectpath + u'meta.xml', objectpath + u'content.xml', objectpath + u'styles.xml'): if xmlfile not in manifest: continue ########################################################## # this one is added to debug the bad behavior with Python2 # which raises exceptions of type SAXParseException from xml.sax._exceptions import SAXParseException ########################################################## try: xmlpart = z.read(xmlfile).decode("utf-8") doc._parsing = xmlfile parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setFeature(handler.feature_external_ges, 0) parser.setContentHandler(LoadParser(doc)) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() ################# # There may be a SAXParseException triggered because of # a missing xmlns prefix like meta, config, etc. # So i add such declarations when needed (GK, 2014/10/21). # Is there any option to prevent xmlns checks by SAX? xmlpart = __fixXmlPart(xmlpart) inpsrc.setByteStream(BytesIO(xmlpart.encode("utf-8"))) parser.parse(inpsrc) del doc._parsing except KeyError as v: pass except SAXParseException: print(u"====== SAX FAILED TO PARSE ==========\n", xmlpart)
def odf2xhtml(odtfile): z = zipfile.ZipFile(odtfile) meta = z.read('meta.xml') content = z.read('content.xml') styles = z.read('styles.xml') z.close() odhandler = ODFContentHandler() # meta.xml parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(odhandler) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(StringIO(meta)) parser.parse(inpsrc) # styles.xml parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(odhandler) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(StringIO(styles)) parser.parse(inpsrc) # content.xml parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(odhandler) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(StringIO(content)) parser.parse(inpsrc) return odhandler.getxhtml()
def load(odtfile): mimetype = _getxmlpart(odtfile, 'mimetype') d = OpenDocument(mimetype) for xmlfile in ('content.xml', ): xmlpart = _getxmlpart(odtfile, xmlfile) parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(ODFParser(d)) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(StringIO(xmlpart)) parser.parse(inpsrc) return d
def odf2xhtml(self, odtfile): # Extract the interesting files z = zipfile.ZipFile(odtfile) parser = xml.sax.make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(self) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() for xmlfile in ('meta.xml', 'styles.xml', 'content.xml'): self.xmlfile = xmlfile content = z.read(xmlfile) inpsrc.setByteStream(StringIO(content)) parser.parse(inpsrc) z.close() return ''.join(self.lines)
def _filter_svg(self, input): output_gen = saxutils.XMLGenerator(self.document, encoding='utf8') parser = make_parser() mode = "" if options.shadow: mode += "shadows," filter = SVGFilter(parser, output_gen, mode) filter.setFeature(handler.feature_namespaces, False) filter.setErrorHandler(handler.ErrorHandler()) filter.parse(input) del filter del parser del output_gen if options.test: with open('{}/output.svg'.format(pngs_directory), 'wb') as f: f.write(self.document.getvalue())
def __loadxmlparts(z, manifest, doc, objectpath): from load import LoadParser from xml.sax import make_parser, handler for xmlfile in (objectpath+'settings.xml', objectpath+'meta.xml', objectpath+'content.xml', objectpath+'styles.xml'): if not manifest.has_key(xmlfile): continue try: xmlpart = z.read(xmlfile) doc._parsing = xmlfile parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(LoadParser(doc)) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(StringIO(xmlpart)) parser.parse(inpsrc) del doc._parsing except KeyError, v: pass