def to_xml(text): try: if PY2: # On python2, fromstring expects an encoded string return fromstring((text[BOM_LEN:] if text.startswith(BOM) else text).encode('utf-8')) return fromstring(text[BOM_LEN:] if text.startswith(BOM) else text) except ParseError: # Exchange servers may spit out the weirdest XML. lxml is pretty good at recovering from errors log.warning('Fallback to lxml processing of faulty XML') magical_parser = XMLParser(recover=True, resolve_entities=False) magical_parser.set_element_class_lookup(ElementDefaultClassLookup(element=RestrictedElement)) no_bom_text = text[BOM_LEN:] if text.startswith(BOM) else text try: root = parse(io.BytesIO(no_bom_text.encode('utf-8')), parser=magical_parser) except AssertionError as e: raise ParseError(*e.args) try: return fromstring(tostring(root)) except ParseError as e: if hasattr(e, 'position'): e.lineno, e.offset = e.position if not e.lineno: raise ParseError('%s' % text_type(e)) try: offending_line = no_bom_text.splitlines()[e.lineno - 1] except IndexError: raise ParseError('%s' % text_type(e)) else: offending_excerpt = offending_line[max(0, e.offset - 20):e.offset + 20] raise ParseError('%s\nOffending text: [...]%s[...]' % (text_type(e), offending_excerpt)) except TypeError: raise ParseError('This is not XML: %s' % text)
def loadXml(data, **parserOptions): """Load SVG from an XML string, fileName, or file-like object. @type data: string or file-like object @param data: The serialized SVG, fileName, or file-like object that generates SVG as XML. @param **parserOptions: Arguments passed to lxml's U{XMLParser<http://lxml.de/api/lxml.etree.XMLParser-class.html>}. @rtype: SvgBinding @return: An in-memory representation of the SVG. """ if isinstance(data, basestring): if os.path.exists(data): data = open(data) else: data = StringIO(data) newParserOptions = {"huge_tree": True} newParserOptions.update(parserOptions) parserOptions = newParserOptions parser = XMLParser(**parserOptions) lookup = ElementDefaultClassLookup(element=SvgBinding) parser.set_element_class_lookup(lookup) return parse(data, parser).getroot()
def makeElementMaker(): """Obtain a factory for making in-memory SVG objects. This factory is an lxml ElementMaker, pre-loaded with the SVG namespace and this ModelLoader's current tag-to-class relationship. See the lxml documentation for how to use an ElementMaker. The C{SvgBinding} class has an C{elementMaker} attribute that should be used instead of calling this function. @see: The lxml U{ElementMaker documentation<http://lxml.de/api/lxml.builder.ElementMaker-class.html>}, which explains how to use an ElementMaker factory. """ parser = XMLParser(huge_tree=True) lookup = ElementDefaultClassLookup(element=SvgBinding) parser.set_element_class_lookup(lookup) return ElementMaker(namespace=defs.SVG_NAMESPACE, nsmap={ None: defs.SVG_NAMESPACE, "xlink": defs.XLINK_NAMESPACE }, makeelement=parser.makeelement)
# Copyright (C) 2012-2018 by Dr. Dieter Maurer <*****@*****.**>; see 'LICENSE.txt' for details """Auxiliary classes to construct signature/encryption templates.""" from lxml.etree import ElementBase, \ parse as et_parse, fromstring as et_fromstring, XML as et_xml, \ XMLParser, ElementNamespaceClassLookup, ElementDefaultClassLookup from dm.xmlsec.binding import DSigNs, dsig, EncNs, enc # set up our own parser and related `etree` infrastructure parser = XMLParser() # apparently, `parser` has a `set_element_class_lookup` but not corresponding `get` #class_lookup = ElementNamespaceClassLookup(parser.get_element_class_lookup()) class_lookup = ElementNamespaceClassLookup(ElementDefaultClassLookup()) parser.set_element_class_lookup(class_lookup) Element = parser.makeelement def SubElement(node, *args, **kw): node.append(Element(*args, **kw)) def parse(file, parser=parser): return et_parse(file, parser=parser) def fromstring(s, parser=parser): return et_fromstring(s, parser=parser) def XML(s, parser=parser):
def create_lxml_context(): parser = XMLParser(no_network=True) parser.set_element_class_lookup( ElementDefaultClassLookup(element=Element, comment=Comment)) return parser
from lxml.etree import ElementBase, ElementDefaultClassLookup, XMLParser import re ILLEGAL_CHARS_RE = re.compile(u'[\x00-\x08\x0b-\x1f\x7f-\x84\x86-\x9f' u'\ud800-\udfff\ufdd0-\ufddf\ufffe-\uffff]') class ControlCharStrippingElement(ElementBase): def __setattr__(self, name, value): if name is 'text': value = ILLEGAL_CHARS_RE.sub('', value) super(ControlCharStrippingElement, self).__setattr__(name, value) parser_lookup = ElementDefaultClassLookup(element=ControlCharStrippingElement) parser = XMLParser() parser.set_element_class_lookup(parser_lookup) makeelement = parser.makeelement