def limit_featurecollection(content, limit=200): """ Parse a WFS FeatureCollection XML string and produce a similar string with at most 200 features. """ parser = make_parser() _input = BytesIO(content) input_source = InputSource() input_source.setByteStream(_input) output = StringIO() downstream = XMLGenerator(output, 'utf-8') _filter = _XMLFilterLimit(parser, downstream, limit=limit) _filter.parse(input_source) result = output.getvalue() _input.close() output.close() return result
def rootElement(self): # Get the context that was originally generated during startup and # create a new context using its registrations real_context = zope.app.appsetup.appsetup.getConfigContext() context = config.ConfigurationMachine() context._registry = copy.copy(real_context._registry) context._features = copy.copy(real_context._features) context.package = self.package # Shut up i18n domain complaints context.i18n_domain = 'zope' # Since we want to use a custom configuration handler, we need to # instantiate the parser object ourselves parser = make_parser() handler = MyConfigHandler(context) parser.setContentHandler(handler) parser.setFeature(feature_namespaces, True) # Now open the file file = open(self.filename) src = InputSource(getattr(file, 'name', '<string>')) src.setByteStream(file) # and parse it parser.parse(src) # Finally we retrieve the root element, have it provide a special root # directive interface and give it a location, so that we can do local # lookups. root = handler.rootElement directlyProvides(root, IRootDirective) root.__parent__ = self return root
def Load(self): try: self.document() self.layer() error_handler = ErrorHandler() entity_resolver = EntityResolver() dtd_handler = DTDHandler() input = open(self.filename, "r") input_source = InputSource() input_source.setByteStream(input) xml_reader = xml.sax.make_parser() xml_reader.setContentHandler(SVGHandler(self)) xml_reader.setErrorHandler(error_handler) xml_reader.setEntityResolver(entity_resolver) xml_reader.setDTDHandler(dtd_handler) xml_reader.setFeature(handler.feature_external_ges, False) xml_reader.parse(input_source) input.close self.end_all() if self.page_layout: self.object.load_SetLayout(self.page_layout) self.object.load_Completed() return self.object except: warn_tb('INTERNAL') raise
def create(self, request, vendor, name, version): format = request.POST.get('format', 'default') # Get the xml containing the tags from the request tags_xml = request.POST.get('tags_xml') tags_xml = tags_xml.encode("utf-8") # Parse the xml containing the tags parser = make_parser() handler = TagsXMLHandler() # Tell the parser to use our handler parser.setContentHandler(handler) # Parse the input inpsrc = InputSource() inpsrc.setByteStream(StringIO(tags_xml)) parser.parse(inpsrc) # Get the resource's id for those vendor, name and version resource = get_object_or_404(CatalogueResource, short_name=name, vendor=vendor, version=version) # Insert the tags for these resource and user in the database for e in handler._tags: tag_resource(request.user, e, resource) return get_tag_response(resource, request.user, format)
def parse(self, xml, source='string'): '''Parses a XML stream. * If p_source is "string", p_xml must be a string containing valid XML content. * If p_source is "file": p_xml can be: - a string containing the path to the XML file on disk; - a file instance opened for reading. Note that in this case, this method will close it. ''' try: from cStringIO import StringIO except ImportError: from StringIO import StringIO self.parser.setContentHandler(self) self.parser.setErrorHandler(self) self.parser.setFeature(feature_external_ges, False) inputSource = InputSource() if source == 'string': inputSource.setByteStream(StringIO(xml)) else: if not isinstance(xml, file): xml = file(xml) inputSource.setByteStream(xml) self.parser.parse(inputSource) if isinstance(xml, file): xml.close() return self.res
def _build_model(self): content_handler = XMLDocReader(self.presenter) error_handler = ErrorHandler() entity_resolver = EntityResolver() dtd_handler = DTDHandler() try: filename = os.path.join(self.presenter.doc_dir, 'content.xml') handler = open(filename, 'r') lines = float(sum(1 for l in handler)) handler.close() self.file_handler = open(filename, "r") input_source = InputSource() input_source.setByteStream(self.file_handler) content_handler.lines = lines xml_reader = xml.sax.make_parser() xml_reader.setContentHandler(content_handler) xml_reader.setErrorHandler(error_handler) xml_reader.setEntityResolver(entity_resolver) xml_reader.setDTDHandler(dtd_handler) xml_reader.parse(input_source) self.file_handler.close() content_handler.file = None except: errtype, value, traceback = sys.exc_info() msg = _('It seems content.xml is corrupted') + '\n' + value events.emit(events.MESSAGES, msgconst.ERROR, msg) raise IOError(errtype, msg , traceback) self.model = content_handler.model msg = _('Content.xml is parsed successfully') events.emit(events.MESSAGES, msgconst.OK, msg)
def __loadxmlparts(z, manifest, doc, objectpath): from load import LoadParser from xml.sax import make_parser, handler for xmlfile in ( objectpath + 'settings.xml', objectpath + 'meta.xml', objectpath + 'content.xml', objectpath + 'styles.xml'): if xmlfile not in manifest: continue try: xmlpart = z.read(xmlfile) doc._parsing = xmlfile parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(LoadParser(doc)) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(StringIO(xmlpart)) parser.parse(inpsrc) del doc._parsing except KeyError as v: pass
def countriesSource() -> InputSource: ''' Provides the countries input source for the XML. ''' source = InputSource() source.setByteStream(openURI(path.join(path.dirname(__file__), 'iso_3166-1_list_en.xml'))) return source
class AbstractXMLLoader(AbstractLoader, handler.ContentHandler): xml_reader = None input_source = None def init_load(self): self.input_source = InputSource() self.input_source.setByteStream(self.fileptr) self.xml_reader = xml.sax.make_parser() self.xml_reader.setContentHandler(self) self.xml_reader.setErrorHandler(ErrorHandler()) self.xml_reader.setEntityResolver(EntityResolver()) self.xml_reader.setDTDHandler(DTDHandler()) self.do_load() def start_parsing(self): self.xml_reader.parse(self.input_source) def startElement(self, name, attrs): self.start_element(name, attrs) def endElement(self, name): self.end_element(name) def characters(self, data): self.element_data(data) def start_element(self, name, attrs):pass def end_element(self, name):pass def element_data(self, data):pass
def load(odffile): """ Load an ODF file into memory Returns a reference to the structure """ from load import LoadParser from xml.sax import make_parser, handler z = zipfile.ZipFile(odffile) mimetype = z.read("mimetype") doc = OpenDocument(mimetype, add_generator=False) # Look in the manifest file to see if which of the four files there are manifestpart = z.read("META-INF/manifest.xml") manifest = manifestlist(manifestpart) for xmlfile in ("settings.xml", "meta.xml", "content.xml", "styles.xml"): if not manifest.has_key(xmlfile): continue try: xmlpart = z.read(xmlfile) doc._parsing = xmlfile parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(LoadParser(doc)) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(StringIO(xmlpart)) parser.parse(inpsrc) del doc._parsing except KeyError, v: pass
def create(self,request, user_name, vendor, name, version): user = user_authentication(user_name) # Get the xml containing the tags from the request tags_xml = request.__getitem__('tags_xml') # Parse the xml containing the tags parser = make_parser() handler = TagsXMLHandler() # Tell the parser to use our handler parser.setContentHandler(handler) # Parse the input try: from StringIO import StringIO except ImportError: from cStringIO import StringIO inpsrc = InputSource() inpsrc.setByteStream(StringIO(tags_xml)) parser.parse(inpsrc) # Get the gadget's id for those vendor, name and version gadget = get_object_or_404(GadgetResource, short_name=name,vendor=vendor,version=version) # Insert the tags for these resource and user in the database for e in handler._tags: try: UserTag.objects.get_or_create(tag=e, idUser=user, idResource=gadget) except: return HttpResponseServerError(get_xml_error(str(sys.exc_info()[1])),mimetype='text/xml; charset=UTF-8') response = '<?xml version="1.0" encoding="UTF-8" ?>\n' response += get_tags_by_resource(gadget, user) return HttpResponse(response,mimetype='text/xml; charset=UTF-8')
def load(odffile): from load import LoadParser from xml.sax import make_parser, handler z = zipfile.ZipFile(odffile) mimetype = z.read('mimetype') doc = OpenDocument(mimetype, add_generator=False) # Look in the manifest file to see if which of the four files there are manifestpart = z.read('META-INF/manifest.xml') manifest = manifestlist(manifestpart) for xmlfile in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'): if not manifest.has_key(xmlfile): continue try: xmlpart = z.read(xmlfile) doc._parsing = xmlfile parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(LoadParser(doc)) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(StringIO(xmlpart)) parser.parse(inpsrc) del doc._parsing except KeyError, v: pass # FIXME: Add subobjects correctly here for mentry,mvalue in manifest.items(): if mentry[:9] == "Pictures/" and len(mentry) > 9: doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry)) elif mentry == "Thumbnails/thumbnail.png": doc.addThumbnail(z.read(mentry)) elif mentry in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'): pass else: if mvalue['full-path'][-1] == '/': doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None)) else: doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], z.read(mentry))) # Add the SUN junk here to the struct somewhere # It is cached data, so it can be out-of-date z.close() b = doc.getElementsByType(Body) if mimetype[:39] == 'application/vnd.oasis.opendocument.text': doc.text = b[0].firstChild elif mimetype[:43] == 'application/vnd.oasis.opendocument.graphics': doc.graphics = b[0].firstChild elif mimetype[:47] == 'application/vnd.oasis.opendocument.presentation': doc.presentation = b[0].firstChild elif mimetype[:46] == 'application/vnd.oasis.opendocument.spreadsheet': doc.spreadsheet = b[0].firstChild elif mimetype[:40] == 'application/vnd.oasis.opendocument.chart': doc.chart = b[0].firstChild elif mimetype[:40] == 'application/vnd.oasis.opendocument.image': doc.image = b[0].firstChild elif mimetype[:42] == 'application/vnd.oasis.opendocument.formula': doc.formula = b[0].firstChild return doc
def resolveEntity(self, publicId, systemId): for p in self.path: fname = os.path.join(p, systemId) if os.path.exists(fname): source = InputSource(systemId) source.setByteStream(open(fname)) return source return InputSource(systemId)
def test_parse_InputSource(self): # accept data without declared but with explicitly specified encoding make_xml_file(self.data, 'iso-8859-1', None) with open(TESTFN, 'rb') as f: input = InputSource() input.setByteStream(f) input.setEncoding('iso-8859-1') self.check_parse(input)
def parse_request(self,soap_body,sinfo,encoding): parser = make_parser() ch = SOAP11ContentHandler(parser) parser.setContentHandler(ch) inpsrc = InputSource() inpsrc.setByteStream(BytesIO(soap_body)) parser.parse(inpsrc) return ch.req_dict
def test_character_stream(self): # If the source is an InputSource with a character stream, use it. src = InputSource(self.file) src.setCharacterStream(self.make_character_stream()) prep = prepare_input_source(src) self.assertIsNone(prep.getByteStream()) self.checkContent(prep.getCharacterStream(), "This is a character stream.")
def parse_request(self, soap_body, sinfo, encoding): parser = make_parser() ch = SOAP11ContentHandler(parser) parser.setContentHandler(ch) inpsrc = InputSource() inpsrc.setByteStream(BytesIO(soap_body)) parser.parse(inpsrc) return ch.req_dict
def test_byte_stream(self): # If the source is an InputSource that does not have a character # stream but does have a byte stream, use the byte stream. src = InputSource(self.file) src.setByteStream(self.make_byte_stream()) prep = prepare_input_source(src) self.assertIsNone(prep.getCharacterStream()) self.checkContent(prep.getByteStream(), b"This is a byte stream.")
def init_load(self): self.input_source = InputSource() self.input_source.setByteStream(self.fileptr) self.xml_reader = xml.sax.make_parser() self.xml_reader.setContentHandler(self) self.xml_reader.setErrorHandler(ErrorHandler()) self.xml_reader.setEntityResolver(EntityResolver()) self.xml_reader.setDTDHandler(DTDHandler()) self.do_load()
def test_expat_inpsource_character_stream(self): parser = create_parser() result = BytesIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) inpsrc = InputSource() with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f: inpsrc.setCharacterStream(f) parser.parse(inpsrc) self.assertEqual(result.getvalue(), xml_test_out)
def test_expat_inpsource_byte_stream(self): parser = create_parser() result = BytesIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) inpsrc = InputSource() with open(TEST_XMLFILE, 'rb') as f: inpsrc.setByteStream(f) parser.parse(inpsrc) self.assertEqual(result.getvalue(), xml_test_out)
def __init__(self, ictx, file): Digester.__init__(self) self._ictx = ictx self._file = file self._input = InputSource(file.name) self._input.setByteStream(BZ2File(file.name, 'r')) self._conn = ictx['conn'].connection self._cursor = self._conn.cursor() self.success = self._closed = False self._add_rules()
def init_load(self): self.input_source = InputSource() self.input_source.setByteStream(self.fileptr) self.xml_reader = xml.sax.make_parser() self.xml_reader.setContentHandler(self) self.xml_reader.setErrorHandler(handler.ErrorHandler()) self.xml_reader.setEntityResolver(handler.EntityResolver()) self.xml_reader.setDTDHandler(handler.DTDHandler()) self.xml_reader.setFeature(handler.feature_external_ges, False) self.do_load()
def parse(self) : if (isinstance(self.source,unicode)) : # Create a string source file = io.StringIO(self.source) input = InputSource(file) input.setEncoding("utf-8") input.setCharacterStream(file) # There is a bug in xml.sax.saxutils.prepare_input_source input.setByteStream(file) input.setSystemId(None) elif (isinstance(self.source,InputSource)): input = self.source else: raise Exception("Parse source must be either string or InputSource") # Create the parser/xmlreader parser = xml.sax.make_parser() # Tell the parser to use our handler(s) parser.setContentHandler(self) #parser.setErrorHandler(self) #parser.setFeature(xml.sax.handler.feature_namespaces,True) # Shut off dtd validation parser.setFeature(xml.sax.handler.feature_validation,False) parser.setFeature(xml.sax.handler.feature_external_ges, False) # Parse the document parser.parse(input)
def test_expat_inpsource_stream(): parser = create_parser() result = StringIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) inpsrc = InputSource() inpsrc.setByteStream(open(findfile("test" + os.extsep + "xml"))) parser.parse(inpsrc) return result.getvalue() == xml_test_out
def test_expat_inpsource_location(): parser = make_parser() parser.setContentHandler(ContentHandler()) # do nothing source = InputSource() source.setByteStream(StringIO("<foo bar foobar>")) #ill-formed name = "a file name" source.setSystemId(name) try: parser.parse(source) except SAXException, e: return e.getSystemId() == name
def test_expat_inpsource_stream(self): parser = create_parser() result = StringIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) inpsrc = InputSource() inpsrc.setByteStream(open(findfile("test.xml"))) parser.parse(inpsrc) self.assertEqual(result.getvalue(), xml_test_out)
def test_expat_inpsource_stream(): parser = make_parser() result = StringIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) inpsrc = InputSource() inpsrc.setByteStream(open(findfile("test.xml"))) parser.parse(inpsrc) return result.getvalue() == xml_test_out
def test_expat_inpsource_stream(self): parser = create_parser() result = StringIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) inpsrc = InputSource() inpsrc.setByteStream(open(findfile("test.xml"))) parser.parse(inpsrc) self.assertEquals(result.getvalue(), xml_test_out)
def resolveEntity(self, publicId, systemId): source = InputSource() source.setSystemId(systemId) try: dtdPath = self.knownDTDs[systemId] except KeyError: raise process.ProcessingFailure( "Invalid DTD system identifier (%r) in %s. Only " "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd " "is allowed." % (systemId, self.filename)) source.setByteStream(dtdPath.open()) return source
def test_expat_inpsource_location(self): parser = create_parser() parser.setContentHandler(ContentHandler()) # do nothing source = InputSource() source.setByteStream(StringIO("<foo bar foobar>")) #ill-formed name = "a file name" source.setSystemId(name) try: parser.parse(source) self.fail() except SAXException as e: self.assertEqual(e.getSystemId(), name)
def manifestlist(manifestxml): odhandler = ODFManifestHandler() parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(odhandler) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(StringIO(manifestxml)) parser.parse(inpsrc) return odhandler.manifest
def test_expat_inpsource_stream(self): parser = create_parser() result = StringIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) inpsrc = InputSource() with open(TEST_XMLFILE) as f: inpsrc.setByteStream(f) parser.parse(inpsrc) self.assertEqual(result.getvalue(), xml_test_out)
def manifestlist(manifestxml): odhandler = ODFManifestHandler() parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(odhandler) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(io.BytesIO(manifestxml)) parser.setFeature(handler.feature_external_ges, False) # Changed by Kovid to ignore external DTDs parser.parse(inpsrc) return odhandler.manifest
def manifestlist(manifestxml): odhandler = ODFManifestHandler() parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(odhandler) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(StringIO(manifestxml)) parser.setFeature(handler.feature_external_ges, False) # Changed by Kovid to ignore external DTDs parser.parse(inpsrc) return odhandler.manifest
def __loadxmlparts(z, manifest, doc, objectpath): """ Parses a document from its zipfile @param z an instance of zipfile.ZipFile @param manifest Manifest data structured in a dictionary @param doc instance of OpenDocument to feed in @param objectpath unicode string: path to an object """ assert (isinstance(z, zipfile.ZipFile)) assert (type(manifest) == type(dict())) assert (isinstance(doc, OpenDocument)) assert (type(objectpath) == type(u"")) from odf.load import LoadParser from defusedxml.sax import make_parser from xml.sax import handler for xmlfile in (objectpath + u'settings.xml', objectpath + u'meta.xml', objectpath + u'content.xml', objectpath + u'styles.xml'): if xmlfile not in manifest: continue ########################################################## # this one is added to debug the bad behavior with Python2 # which raises exceptions of type SAXParseException from xml.sax._exceptions import SAXParseException ########################################################## try: xmlpart = z.read(xmlfile).decode("utf-8") doc._parsing = xmlfile parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setFeature(handler.feature_external_ges, 0) parser.setContentHandler(LoadParser(doc)) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() ################# # There may be a SAXParseException triggered because of # a missing xmlns prefix like meta, config, etc. # So i add such declarations when needed (GK, 2014/10/21). # Is there any option to prevent xmlns checks by SAX? xmlpart = __fixXmlPart(xmlpart) inpsrc.setByteStream(BytesIO(xmlpart.encode("utf-8"))) parser.parse(inpsrc) del doc._parsing except KeyError as v: pass except SAXParseException: print(u"====== SAX FAILED TO PARSE ==========\n", xmlpart)
def create(self, request, user_name, vendor, name, version): try: format = request.POST.__getitem__('format') except: format = 'default' user = user_authentication(request, user_name) # Get the xml containing the tags from the request tags_xml = request.POST.__getitem__('tags_xml') tags_xml = tags_xml.encode("utf-8") # Parse the xml containing the tags parser = make_parser() handler = TagsXMLHandler() # Tell the parser to use our handler parser.setContentHandler(handler) # Parse the input try: from StringIO import StringIO except ImportError: from cStringIO import StringIO inpsrc = InputSource() inpsrc.setByteStream(StringIO(tags_xml)) parser.parse(inpsrc) # Get the gadget's id for those vendor, name and version gadget = get_object_or_404(GadgetResource, short_name=name, vendor=vendor, version=version) # Insert the tags for these resource and user in the database for e in handler._tags: try: tag, created = Tag.objects.get_or_create(name=e) UserTag.objects.get_or_create(tag=tag, idUser=user, idResource=gadget) except Exception, ex: transaction.rollback() msg = _("Error tagging resource!!") raise TracedServerError(ex, { 'resource': vendor + name + version, 'tags': tags_xml }, request, msg)
def processxmlfile(file, context, testing=False): """Process a configuration file See examples in tests/text_xmlconfig.py """ src = InputSource(getattr(file, 'name', '<string>')) src.setByteStream(file) parser = make_parser() parser.setContentHandler(ConfigurationHandler(context, testing=testing)) parser.setFeature(feature_namespaces, True) try: parser.parse(src) except SAXParseException: raise ZopeSAXParseException(sys.exc_info()[1]), None, sys.exc_info()[2]
def listNewDatasets(config, onlyThese=[]): """ Reads the table_of_contents.xml to determine which datasets have changed since the last download, and then downloads them """ # Start XML parsing parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) ch = ToCParser(config, onlyThese) parser.setContentHandler(ch) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(open('table_of_contents.xml')) parser.parse(inpsrc) return ch.todownload
def load_from_file(self, filename=None): """ Loads color scheme from well-formated xml file. filename - full path to xml file """ if filename: content_handler = XMLPrefReader(pref=self) error_handler = ErrorHandler() entity_resolver = EntityResolver() dtd_handler = DTDHandler() try: input = open(filename, "r") input_source = InputSource() input_source.setByteStream(input) xml_reader = xml.sax.make_parser() xml_reader.setContentHandler(content_handler) xml_reader.setErrorHandler(error_handler) xml_reader.setEntityResolver(entity_resolver) xml_reader.setDTDHandler(dtd_handler) xml_reader.parse(input_source) input.close except: import traceback traceback.print_exc() raise self.name = None if self.disabledforeground is None: self.disabledforeground = lighter_color(self.foreground, .3) if self.menubackground is None: self.menubackground = self.bg if self.menuforeground is None: self.menuforeground = self.foreground if self.menuselectbackground is None: self.menuselectbackground = self.selectbackground if self.menuselectforeground is None: self.menuselectforeground = self.selectforeground if self.menudisabledforeground is None: self.menudisabledforeground = self.disabledforeground if self.menubordercolor is None: self.menubordercolor = self.disabledforeground if self.editfieldbackground is None: self.editfieldbackground = '#ffffff' if self.editfieldforeground is None: self.editfieldforeground = self.foreground if self.evencolor is None: self.evencolor = middle_color(self.bg, self.editfieldbackground, 0.7) if self.treelinescolor is None: self.treelinescolor = self.editfieldforeground
def __read_data__(self): """Reads the XML document. """ if self.__data: return fd = self.__open_resource__() if fd: data = fd.read() fd.close() self.__data = InputSource() self.__data.setByteStream(_StringIO(data))
def __loadxmlparts(z, manifest, doc, objectpath): """ Parses a document from its zipfile @param z an instance of zipfile.ZipFile @param manifest Manifest data structured in a dictionary @param doc instance of OpenDocument to feed in @param objectpath unicode string: path to an object """ assert(isinstance(z, zipfile.ZipFile)) assert(type(manifest)==type(dict())) assert(isinstance(doc, OpenDocument)) assert(type(objectpath)==type(u"")) from odf.load import LoadParser from defusedxml.sax import make_parser from xml.sax import handler for xmlfile in (objectpath+u'settings.xml', objectpath+u'meta.xml', objectpath+u'content.xml', objectpath+u'styles.xml'): if xmlfile not in manifest: continue ########################################################## # this one is added to debug the bad behavior with Python2 # which raises exceptions of type SAXParseException from xml.sax._exceptions import SAXParseException ########################################################## try: xmlpart = z.read(xmlfile).decode("utf-8") doc._parsing = xmlfile parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setFeature(handler.feature_external_ges, 0) parser.setContentHandler(LoadParser(doc)) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() ################# # There may be a SAXParseException triggered because of # a missing xmlns prefix like meta, config, etc. # So i add such declarations when needed (GK, 2014/10/21). # Is there any option to prevent xmlns checks by SAX? xmlpart=__fixXmlPart(xmlpart) inpsrc.setByteStream(BytesIO(xmlpart.encode("utf-8"))) parser.parse(inpsrc) del doc._parsing except KeyError as v: pass except SAXParseException: print (u"====== SAX FAILED TO PARSE ==========\n", xmlpart)
def load(odtfile): mimetype = _getxmlpart(odtfile, 'mimetype') d = OpenDocument(mimetype) for xmlfile in ('content.xml', ): xmlpart = _getxmlpart(odtfile, xmlfile) parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(ODFParser(d)) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(StringIO(xmlpart)) parser.parse(inpsrc) return d
def test_expat_inpsource_sysid(self): parser = create_parser() result = BytesIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) parser.parse(InputSource(TEST_XMLFILE)) self.assertEqual(result.getvalue(), xml_test_out)
def test_system_id(self): # If the source is an InputSource that has neither a character # stream nor a byte stream, open the system ID. src = InputSource(self.file) prep = prepare_input_source(src) self.assertIsNone(prep.getCharacterStream()) self.checkContent(prep.getByteStream(), b"This was read from a file.")
def create(self, request, user_name, vendor, name, version): try: format = request.POST.__getitem__('format') except: format = 'default' user = user_authentication(request, user_name) # Get the xml containing the tags from the request tags_xml = request.POST.__getitem__('tags_xml') tags_xml = tags_xml.encode("utf-8") # Parse the xml containing the tags parser = make_parser() handler = TagsXMLHandler() # Tell the parser to use our handler parser.setContentHandler(handler) # Parse the input try: from StringIO import StringIO except ImportError: from cStringIO import StringIO inpsrc = InputSource() inpsrc.setByteStream(StringIO(tags_xml)) parser.parse(inpsrc) # Get the gadget's id for those vendor, name and version gadget = get_object_or_404(GadgetResource, short_name=name, vendor=vendor, version=version) # Insert the tags for these resource and user in the database for e in handler._tags: try: UserTag.objects.get_or_create(tag=e, idUser=user, idResource=gadget) except Exception, ex: log(ex, request) return HttpResponseServerError( get_xml_error(unicode(ex)), mimetype='application/xml; charset=UTF-8')
def odf2xhtml(self, odtfile): # Extract the interesting files z = zipfile.ZipFile(odtfile) parser = xml.sax.make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(self) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() for xmlfile in ('meta.xml', 'styles.xml', 'content.xml'): self.xmlfile = xmlfile content = z.read(xmlfile) inpsrc.setByteStream(StringIO(content)) parser.parse(inpsrc) z.close() return ''.join(self.lines)
class AbstractXMLLoader(AbstractLoader, handler.ContentHandler): xml_reader = None input_source = None def init_load(self): self.input_source = InputSource() self.input_source.setByteStream(self.fileptr) self.xml_reader = xml.sax.make_parser() self.xml_reader.setContentHandler(self) self.xml_reader.setErrorHandler(handler.ErrorHandler()) self.xml_reader.setEntityResolver(handler.EntityResolver()) self.xml_reader.setDTDHandler(handler.DTDHandler()) self.xml_reader.setFeature(handler.feature_external_ges, False) self.do_load() def start_parsing(self): self.xml_reader.parse(self.input_source) def startElement(self, name, attrs): if isinstance(name, unicode): name = name.encode('utf-8') ret = {} for key, value in attrs._attrs.items(): ret[key.encode('utf-8')] = attrs[key].encode('utf-8') attrs._attrs = ret self.start_element(name, attrs) def endElement(self, name): if isinstance(name, unicode): name = name.encode('utf-8') self.end_element(name) def characters(self, data): if isinstance(data, unicode): data = data.encode('utf-8') self.element_data(data) def start_element(self, name, attrs): pass def end_element(self, name): pass def element_data(self, data): pass
def test_expat_inpsource_sysid(self): parser = create_parser() result = StringIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) parser.parse(InputSource(findfile("test.xml"))) self.assertEquals(result.getvalue(), xml_test_out)
def test_expat_inpsource_sysid(): parser = create_parser() result = StringIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) parser.parse(InputSource(findfile("test" + os.extsep + "xml"))) return result.getvalue() == xml_test_out
def recover(self): """ This is duty hack to remove weird characters presented in some replications files. Using the tidy tool. """ logger.warning('Trying to recover invalid XML...') originalXML = None fixedXML = None try: originalXML = tempfile.NamedTemporaryFile( suffix='.xml', delete=False) #bunzipped tmp fixedXML = tempfile.NamedTemporaryFile(suffix='.xml', delete=False) #fixed tmp fixedXML.close() #Fetch uncompressed file data to recover bzf = self._input.getByteStream() bzf.seek(0) shutil.copyfileobj(bzf, originalXML) originalXML.close() cmd = ['tidy', '-xml', '-o', fixedXML.name, originalXML.name] logger.warning('Running: %s', ' '.join(cmd)) ret = subprocess.call(cmd) if ret: #raise Exception('Failed to fix XML data, ret=%s' % ret) pass #ready to load self.close() self._file = file(fixedXML.name, 'r') self._input = InputSource(fixedXML.name) self._input.setByteStream(self._file) self._cursor = self._conn.cursor() self.success = self._closed = False self.reset() self._add_rules() self.load() finally: for f in [originalXML, fixedXML]: if f and not f.closed: f.close() if f and os.path.exists(f.name): os.unlink(f.name)
def test_expat_inpsource_sysid_nonascii(self): fname = support.TESTFN_UNICODE shutil.copyfile(TEST_XMLFILE, fname) self.addCleanup(support.unlink, fname) parser = create_parser() result = BytesIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) parser.parse(InputSource(fname)) self.assertEqual(result.getvalue(), xml_test_out)
def is_get_feature(content): """ Determine if the XML string is a WFS GetFeature request. """ parser = make_parser() _filter = _XMLFilterGetFeature() parser.setContentHandler(_filter) input_source = InputSource() if isinstance(content, str): content = unicode(content, 'utf-8') _input = BytesIO(content.encode('utf-8')) input_source.setByteStream(_input) parser.parse(input_source) _input.close() return _filter.result
def create(self, request, user_name, vendor, name, version): user = user_authentication(user_name) # Get the xml containing the tags from the request tags_xml = request.__getitem__('tags_xml') # Parse the xml containing the tags parser = make_parser() handler = TagsXMLHandler() # Tell the parser to use our handler parser.setContentHandler(handler) # Parse the input try: from StringIO import StringIO except ImportError: from cStringIO import StringIO inpsrc = InputSource() inpsrc.setByteStream(StringIO(tags_xml)) parser.parse(inpsrc) # Get the gadget's id for those vendor, name and version gadget = get_object_or_404(GadgetResource, short_name=name, vendor=vendor, version=version) # Insert the tags for these resource and user in the database for e in handler._tags: try: UserTag.objects.get_or_create(tag=e, idUser=user, idResource=gadget) except: return HttpResponseServerError( get_xml_error(str(sys.exc_info()[1])), mimetype='text/xml; charset=UTF-8') response = '<?xml version="1.0" encoding="UTF-8" ?>\n' response += get_tags_by_resource(gadget, user) return HttpResponse(response, mimetype='text/xml; charset=UTF-8')