示例#1
0
def transform(source, transforms, params=None, output=None):
    """
    Convenience function for applying an XSLT transform.  Returns
    a result object.

    source - XML source document in the form of a string (not Unicode
             object), file-like object (stream), file path, URI or
             amara.lib.inputsource instance.  If string or stream
             it must be self-contained  XML (i.e. not requiring access to
             any other resource such as external entities or includes)
    transforms - XSLT document (or list thereof) in the form of a string, stream, URL,
                file path or amara.lib.inputsource instance
    params - optional dictionary of stylesheet parameters, the keys of
             which may be given as unicode objects if they have no namespace,
             or as (uri, localname) tuples if they do.
    output - optional file-like object to which output is written (incrementally, as processed)
    """
    #do the imports within the function: a tad bit less efficient, but
    #avoid circular crap
    from amara.lib import inputsource
    from amara.xpath.util import parameterize
    from amara.xslt.result import streamresult, stringresult
    from amara.xslt.processor import processor
    params = parameterize(params) if params else {}
    proc = processor()
    if isinstance(transforms, (list, tuple)):
        for transform in transforms:
            proc.append_transform(inputsource(transform))
    else:
        proc.append_transform(inputsource(transforms))
    if output is not None:
        result = streamresult(output)
    else:
        result = stringresult()
    return proc.run(inputsource(source), params, result)
示例#2
0
def setup_blank_text():
    global source
    global trans
    global xslt_proc

    _source1 = '''<?xml version="1.0"?>
<test>
  <item/>
  <item/>
  <item/>
</test>
'''

    _trans1 = '''<?xml version="1.0"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:strip-space  elements="*"/>
  <xsl:template  match="/">
    <test>
      <xsl:apply-templates/>
    </test>
  </xsl:template>
  <xsl:template  match="item">
    <no>
      <xsl:value-of select="position()"/>
    </no>
  </xsl:template>
</xsl:stylesheet>
'''

    xslt_proc = processor()

    source = inputsource(_source1, None)
    trans = inputsource(_trans1, None)
示例#3
0
def setup_blank_node():
    global source
    global trans
    global xslt_proc

    _source1 = '''<?xml version="1.0"?>
<document>
<text>   </text>
</document>'''

    _trans1 = '''<?xml version='1.0'?>
<xsl:stylesheet version="1.0"
      xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  <xsl:output method="text"/>
  <xsl:strip-space elements="*"/>

  <xsl:template match="/">
    <xsl:apply-templates select="//text"/>
  </xsl:template>

<xsl:template match="text">
Chars: <xsl:value-of select="string-length(text())"/>
</xsl:template>

</xsl:stylesheet>'''

    xslt_proc = processor()

    source = inputsource(_source1, None)
    trans = inputsource(_trans1, None)
示例#4
0
def transform(source, transforms, params=None, output=None):
    """
    Convenience function for applying an XSLT transform.  Returns
    a result object.

    source - XML source document in the form of a string (not Unicode
             object), file-like object (stream), file path, URI or
             amara.lib.inputsource instance.  If string or stream
             it must be self-contained  XML (i.e. not requiring access to
             any other resource such as external entities or includes)
    transforms - XSLT document (or list thereof) in the form of a string, stream, URL,
                file path or amara.lib.inputsource instance
    params - optional dictionary of stylesheet parameters, the keys of
             which may be given as unicode objects if they have no namespace,
             or as (uri, localname) tuples if they do.
    output - optional file-like object to which output is written (incrementally, as processed)
    """
    #do the imports within the function: a tad bit less efficient, but
    #avoid circular crap
    from amara.lib import inputsource
    from amara.xpath.util import parameterize
    from amara.xslt.result import streamresult, stringresult
    from amara.xslt.processor import processor
    params = parameterize(params) if params else {}
    proc = processor()
    if isinstance(transforms, (list, tuple)):
        for transform in transforms:
            proc.append_transform(inputsource(transform))
    else:
        proc.append_transform(inputsource(transforms))
    if output is not None:
        result = streamresult(output)
    else:
        result = stringresult()
    return proc.run(inputsource(source), params, result)
示例#5
0
 def test_method(self):
     source = inputsource(self.source, 'source')
     xupdate = inputsource(self.xupdate, 'xupdate-error-source')
     expected = format_error(self.error_code)
     try:
         document = apply_xupdate(source, xupdate)
     except XUpdateError, error:
         compared = format_error(error.code)
         self.assertEquals(expected, compared)
示例#6
0
 def test_method(self):
     source = inputsource(self.source, 'source')
     xupdate = inputsource(self.xupdate, 'xupdate-error-source')
     expected = format_error(self.error_code)
     try:
         document = apply_xupdate(source, xupdate)
     except XUpdateError, error:
         compared = format_error(error.code)
         self.assertEquals(expected, compared)
示例#7
0
 def setUp(self):
     self.source = inputsource(self.source.source, self.source.uri)
     if isinstance(self.transform, testsource):
         T = self.transform
         self.transform = [inputsource(T.source, T.uri)]
     elif self.transform:
           self.transform = [ inputsource(T.source, T.uri)
                              for T in self.transform ]
     else:
         self.transform = ()
     return
示例#8
0
文件: __init__.py 项目: mredar/amara
 def setUp(self):
     self.source = inputsource(self.source.source, self.source.uri)
     if isinstance(self.transform, testsource):
         T = self.transform
         self.transform = [inputsource(T.source, T.uri)]
     elif self.transform:
         self.transform = [
             inputsource(T.source, T.uri) for T in self.transform
         ]
     else:
         self.transform = ()
     return
示例#9
0
文件: html.py 项目: mredar/amara
def parse(source,
          prefixes=None,
          model=None,
          encoding=None,
          use_xhtml_ns=False):
    '''
    Parse an input source with HTML text into an Amara Bindery tree

    Warning: if you pass a string, you must make sure it's a byte string, not a Unicode object.  You might also want to wrap it with amara.lib.inputsource.text if it's not obviously XML or HTML (for example it could be confused with a file name)
    '''
    from amara.lib.util import set_namespaces
    #from amara.bindery import html; doc = html.parse("http://www.hitimewine.net/istar.asp?a=6&id=161153!1247")
    #parser = html5lib.HTMLParser()
    if PRE_0_90:

        def get_tree_instance():
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory)
    else:

        def get_tree_instance(namespaceHTMLElements,
                              use_xhtml_ns=use_xhtml_ns):
            #use_xhtml_ns is a boolean, whether or not to use http://www.w3.org/1999/xhtml
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory, use_xhtml_ns)

    parser = html5lib.HTMLParser(tree=get_tree_instance)
    doc = parser.parse(inputsource(source, None).stream, encoding=encoding)
    if prefixes: set_namespaces(doc, prefixes)
    return doc
示例#10
0
文件: processor.py 项目: mredar/amara
    def append_transform(self, source, uri=None):
        """
        Add an XSL transformation document to the processor.

        uri - optional override document URI.

        This method establishes the transformation that the processor will use
        to transform a source tree into a result tree.  If a transform has
        already been appended, then this method is equivalent to having, in an
        outer "shell" document, an `xsl:import` for the most recently added
        transform followed by an `xsl:import` for the document accessible via
        the given `transform`.
        """
        if isinstance(source, tree.node):
            document = source.xml_root
            if not uri:
                try:
                    uri = document.xml_base
                except AttributeError:
                    raise ValueError('base-uri required for %s' % document)
            self._documents[uri] = document
            self.transform = self._reader.parse(document)
        else:
            if not isinstance(source, inputsource):
                source = inputsource(source, uri)
            self.transform = self._reader.parse(source)
        return
示例#11
0
    def test_scheme_registry_resolver(self):
        def eval_scheme_handler(uri, base=None):
            if base: uri = base+uri
            uri = uri[5:]
            return str(eval(uri))

        def shift_scheme_handler(uri, base=None):
            if base: uri = base+uri
            uri = uri[6:]
            return ''.join([ chr(ord(c)+1) for c in uri])

        resolver = irihelpers.scheme_registry_resolver(
            handlers={'eval': eval_scheme_handler,
                      'shift': shift_scheme_handler})
        start_isrc =  inputsource(FILE('sampleresource.txt'),
                                        resolver=resolver)
        
        scheme_cases = [(None, 'eval:150-50', '100'),
                (None, 'shift:abcde', 'bcdef'),
                ('eval:150-', '50', '100'),
                ('shift:ab', 'cde', 'bcdef'),
            ]

        for base, relative, expected in scheme_cases:
            res = resolver.resolve(relative, base)
            self.assertEqual(expected, res, "URI: base=%s uri=%s" % (base, relative))

        resolver.handlers[None] = shift_scheme_handler
        del resolver.handlers['shift']

        for base, relative, expected in scheme_cases:
            res = resolver.resolve(relative, base)
            self.assertEqual(expected, res, "URI: base=%s uri=%s" % (base, relative))
示例#12
0
    def append_transform(self, source, uri=None):
        """
        Add an XSL transformation document to the processor.

        uri - optional override document URI.

        This method establishes the transformation that the processor will use
        to transform a source tree into a result tree.  If a transform has
        already been appended, then this method is equivalent to having, in an
        outer "shell" document, an `xsl:import` for the most recently added
        transform followed by an `xsl:import` for the document accessible via
        the given `transform`.
        """
        if isinstance(source, tree.node):
            document = source.xml_root
            if not uri:
                try:
                    uri = document.xml_base
                except AttributeError:
                    raise ValueError('base-uri required for %s' % document)
            self._documents[uri] = document
            self.transform = self._reader.parse(document)
        else:
            if not isinstance(source, inputsource):
                source = inputsource(source, uri)
            self.transform = self._reader.parse(source)
        return
示例#13
0
def parse(source,
          prefixes=None,
          model=None,
          encoding=None,
          use_xhtml_ns=False):
    '''
    
    '''
    from amara.lib.util import set_namespaces
    #from amara.bindery import html; doc = html.parse("http://www.hitimewine.net/istar.asp?a=6&id=161153!1247")
    #parser = html5lib.HTMLParser()
    if PRE_0_90:

        def get_tree_instance():
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory)
    else:

        def get_tree_instance(namespaceHTMLElements,
                              use_xhtml_ns=use_xhtml_ns):
            #use_xhtml_ns is a boolean, whether or not to use http://www.w3.org/1999/xhtml
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory, use_xhtml_ns)

    parser = html5lib.HTMLParser(tree=get_tree_instance)
    doc = parser.parse(inputsource(source, None).stream, encoding=encoding)
    if prefixes: set_namespaces(doc, prefixes)
    return doc
示例#14
0
def test_if_1():
    """`xsl:if`"""
    _run_xml(
        source_xml = inputsource(FILE("addr_book1.xml")),
        transform_uri = "file:xslt/test_if.py",
        transform_xml = """<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

  <xsl:strip-space elements='*'/>

  <xsl:template match="/">
    <HTML>
    <HEAD><TITLE>Address Book</TITLE>
    </HEAD>
    <BODY>
    <TABLE><xsl:apply-templates/></TABLE>
    </BODY>
    </HTML>
  </xsl:template>

  <xsl:template match="ENTRY">
        <xsl:element name='TR'>
        <xsl:apply-templates select='NAME'/>
        </xsl:element>
        <xsl:if test='not(position()=last())'><HR/></xsl:if>
  </xsl:template>

  <xsl:template match="NAME">
    <xsl:element name='TD'>
    <xsl:attribute name='ALIGN'>CENTER</xsl:attribute>
      <B><xsl:apply-templates/></B>
    </xsl:element>
  </xsl:template>

</xsl:stylesheet>
""",
    expected = """<HTML>
  <HEAD>
    <META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=iso-8859-1'>
    <TITLE>Address Book</TITLE>
  </HEAD>
  <BODY>
    <TABLE>
      <TR>
        <TD ALIGN='CENTER'><B>Pieter Aaron</B></TD>
      </TR>
      <HR>
      <TR>
        <TD ALIGN='CENTER'><B>Emeka Ndubuisi</B></TD>
      </TR>
      <HR>
      <TR>
        <TD ALIGN='CENTER'><B>Vasia Zhugenev</B></TD>
      </TR>
    </TABLE>
  </BODY>
</HTML>""")
示例#15
0
文件: test_if.py 项目: mredar/amara
def test_if_1():
    """`xsl:if`"""
    _run_xml(source_xml=inputsource(FILE("addr_book1.xml")),
             transform_uri="file:xslt/test_if.py",
             transform_xml="""<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

  <xsl:strip-space elements='*'/>

  <xsl:template match="/">
    <HTML>
    <HEAD><TITLE>Address Book</TITLE>
    </HEAD>
    <BODY>
    <TABLE><xsl:apply-templates/></TABLE>
    </BODY>
    </HTML>
  </xsl:template>

  <xsl:template match="ENTRY">
        <xsl:element name='TR'>
        <xsl:apply-templates select='NAME'/>
        </xsl:element>
        <xsl:if test='not(position()=last())'><HR/></xsl:if>
  </xsl:template>

  <xsl:template match="NAME">
    <xsl:element name='TD'>
    <xsl:attribute name='ALIGN'>CENTER</xsl:attribute>
      <B><xsl:apply-templates/></B>
    </xsl:element>
  </xsl:template>

</xsl:stylesheet>
""",
             expected="""<HTML>
  <HEAD>
    <META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=iso-8859-1'>
    <TITLE>Address Book</TITLE>
  </HEAD>
  <BODY>
    <TABLE>
      <TR>
        <TD ALIGN='CENTER'><B>Pieter Aaron</B></TD>
      </TR>
      <HR>
      <TR>
        <TD ALIGN='CENTER'><B>Emeka Ndubuisi</B></TD>
      </TR>
      <HR>
      <TR>
        <TD ALIGN='CENTER'><B>Vasia Zhugenev</B></TD>
      </TR>
    </TABLE>
  </BODY>
</HTML>""")
示例#16
0
def test_elem_attr_1():
    """`xsl:element` and `xsl:attribute` instantiation"""
    _run_html(
        source_xml = inputsource(os.path.join(module_dirname, 'addr_book1.xml')),
        source_uri = "file:" + module_dirname + "/addr_book1.xml",
        transform_xml = """<?xml version="1.0"?>
<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">

  <xsl:template match="/">
    <HTML>
    <HEAD><TITLE>Address Book</TITLE>
    </HEAD>
    <BODY>
    <TABLE><xsl:apply-templates/></TABLE>
    </BODY>
    </HTML>
  </xsl:template>

  <xsl:template match="ENTRY">
        <xsl:element name='TR'>
        <xsl:apply-templates select='NAME'/>
        </xsl:element>
  </xsl:template>

  <xsl:template match="NAME">
    <xsl:element name='TD'>
    <xsl:attribute name='ALIGN'>CENTER</xsl:attribute>
      <B><xsl:apply-templates/></B>
    </xsl:element>
  </xsl:template>

</xsl:transform>""",
        expected = """<HTML>
  <HEAD>
    <META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=iso-8859-1'>
    <TITLE>Address Book</TITLE>
  </HEAD>
  <BODY>
    <TABLE>
\x20\x20\x20\x20
      <TR>
        <TD ALIGN='CENTER'><B>Pieter Aaron</B></TD>
      </TR>
\x20\x20\x20\x20
      <TR>
        <TD ALIGN='CENTER'><B>Emeka Ndubuisi</B></TD>
      </TR>
\x20\x20\x20\x20
      <TR>
        <TD ALIGN='CENTER'><B>Vasia Zhugenev</B></TD>
      </TR>

    </TABLE>
  </BODY>
</HTML>""")
示例#17
0
    def parse(self, source):
        """
        Read in a stylesheet source document from an InputSource and add it to
        the stylesheet tree. If a document with the same URI has already been
        read, the cached version will be used instead (so duplicate imports,
        includes, or stylesheet appends do not result in multiple reads).
        """
        uri = source.uri

        #Check cache
        content = ''
        if self._root is not None:
            # We prefer to use an already-parsed doc, as it has had its
            # external entities and XIncludes resolved already
            if uri in self._root.sourceNodes:
                doc = self._root.sourceNodes[uri]
                # temporarily uncache it so fromDocument will process it;
                # fromDocument will add it back to the cache when finished
                del self._root.sourceNodes[uri]
                return self.fromDocument(doc, baseUri=uri)
            # It's OK to use cached string content, but we have no idea
            # whether we're using the same InputSource class as was used to
            # parse it the first time, and we don't cache external entities
            # or XIncludes, so there is the possibility of those things
            # being resolved differently this time around. Oh well.
            elif uri in self._root.sources:
                content = self._root.sources[uri]
                source = inputsource(content, uri)

        if not content:
            content = source.stream.read()
            source = inputsource(cStringIO.StringIO(content), source.uri)

        #features = [(sax.FEATURE_PROCESS_XINCLUDES, True)]
        features, properties = [], []
        stylesheet = self._parseSrc(source, features, properties)

        # Cache the string content for subsequent uses
        # e.g., xsl:import/xsl:include and document()
        self._root.sources[uri] = content

        return stylesheet
示例#18
0
    def parse(self, source):
        """
        Read in a stylesheet source document from an InputSource and add it to
        the stylesheet tree. If a document with the same URI has already been
        read, the cached version will be used instead (so duplicate imports,
        includes, or stylesheet appends do not result in multiple reads).
        """
        uri = source.uri

        #Check cache
        content = ''
        if self._root is not None:
            # We prefer to use an already-parsed doc, as it has had its
            # external entities and XIncludes resolved already
            if uri in self._root.sourceNodes:
                doc = self._root.sourceNodes[uri]
                # temporarily uncache it so fromDocument will process it;
                # fromDocument will add it back to the cache when finished
                del self._root.sourceNodes[uri]
                return self.fromDocument(doc, baseUri=uri)
            # It's OK to use cached string content, but we have no idea
            # whether we're using the same InputSource class as was used to
            # parse it the first time, and we don't cache external entities
            # or XIncludes, so there is the possibility of those things
            # being resolved differently this time around. Oh well.
            elif uri in self._root.sources:
                content = self._root.sources[uri]
                source = inputsource(content, uri)

        if not content:
            content = source.stream.read()
            source = inputsource(cStringIO.StringIO(content), source.uri)

        #features = [(sax.FEATURE_PROCESS_XINCLUDES, True)]
        features, properties = [], []
        stylesheet = self._parseSrc(source, features, properties)

        # Cache the string content for subsequent uses
        # e.g., xsl:import/xsl:include and document()
        self._root.sources[uri] = content

        return stylesheet
示例#19
0
def rdfascrape(source):
    from amara.lib import inputsource
    source = inputsource(source, None)
    doc = html.parse(source.stream)
    try:
        docuri = doc.html.head.base.href
    except:
        docuri = source.uri

    statement_elems = doc.xml_select(u'//*[@property|@resource|@rel]')
    triples = (handle_statement(elem, docuri) for elem in statement_elems)
    return triples
示例#20
0
def rdfascrape(source):
    from amara.lib import inputsource
    source = inputsource(source, None)
    doc = html.parse(source.stream)
    try:
        docuri = doc.html.head.base.href
    except:
        docuri = source.uri

    statement_elems = doc.xml_select(u'//*[@property|@resource|@rel]')
    triples = ( handle_statement(elem, docuri) for elem in statement_elems )
    return triples
示例#21
0
def test_many_inputsources():
    assert rlimit_nofile < 20000, "is your file limit really that large?"

    # Amara's inputsource consumes a filehandle, in the 'stream' attribute
    # See what happens if we run out of file handles.
    sources = []
    filename = __file__
    for i in range(rlimit_nofile):
        try:
            sources.append(inputsource(filename))
        except:
            print "Failed after", i, "files"
示例#22
0
 def test_plain_parse(self):
     """Parse plain text"""
     isrc = inputsource(SOURCE1)
     doc = parse_fragment(isrc)
     self.assertEqual(EXPECTED1, doc.xml_encode())
     #Minimal node testing
     self.assertEqual(len(doc.xml_children), 1)
     first_child = doc.xml_children[0]
     self.assertEqual(first_child.xml_typecode, tree.element.xml_typecode)
     self.assertEqual(first_child.xml_qname, u'p')
     self.assertEqual(first_child.xml_namespace, None)
     self.assertEqual(first_child.xml_prefix, None,)
示例#23
0
def parse(obj,
          uri=None,
          entity_factory=None,
          standalone=False,
          validate=False,
          rule_handler=None):
    '''
    Parse an XML input source and return a tree

    :param obj: object with "text" to parse
    :type obj: string, Unicode object (only if you really
        know what you're doing), file-like object (stream), file path, URI or
        `amara.inputsource` object
    :param uri: optional document URI.  You really should provide this if the input source is a
        text string or stream
    :type uri: string
    :return: Parsed tree object
    :rtype: `amara.tree.entity` instance
    :raises `amara.ReaderError`: If the XML is not well formed, or there are other core parsing errors

    entity_factory - optional factory callable for creating entity nodes.  This is the
                     main lever for customizing the classes used to construct tree nodes
    standalone - similar to the standalone declaration for XML.  Asserts that the XML
                 being parsed does not require any resouces outside the given input source
                 (e.g. on the network).  In this case has the side-effect of ignoring such
                 external resources if they are encountered (which is where it diverges
                 from XML core.  In XML core that would be a fatal error)
    validate - whether or not to apply DTD validation
    rule_handler - Handler object used to perform rule matching in incremental processing.

    Examples:

    >>> import amara
    >>> MONTY_XML = """<monty>
    ...   <python spam="eggs">What do you mean "bleh"</python>
    ...   <python ministry="abuse">But I was looking for argument</python>
    ... </monty>"""
    >>> doc = amara.parse(MONTY_XML)
    >>> len(doc.xml_children)
    1

    '''
    if standalone:
        flags = PARSE_FLAGS_STANDALONE
    elif validate:
        flags = PARSE_FLAGS_VALIDATE
    else:
        flags = PARSE_FLAGS_EXTERNAL_ENTITIES
    return _parse(inputsource(obj, uri),
                  flags,
                  entity_factory=entity_factory,
                  rule_handler=rule_handler)
示例#24
0
 def test_uri_jail(self):
     start_uri = iri.os_path_to_uri(FILE('test_irihelpers.py'))
     #raise start_uri
     #print >> sys.stderr, "GRIPPO", start_uri
     start_base = start_uri.rsplit('/', 1)[0] + '/'
     #Only allow access files in the same directory as sampleresource.txt via URL jails
     auths = [(lambda u: u.rsplit('/', 1)[0] + '/' == start_base, True)]
     resolver = irihelpers.resolver(authorizations=auths)
     start_isrc = inputsource(start_uri, resolver=resolver)
     new_isrc = start_isrc.resolve('sampleresource.txt', start_base)
     self.assertEqual('Spam', new_isrc.stream.read().strip())
     self.assertRaises(iri.IriError, resolver.resolve,
                       'http://google.com', start_base)
示例#25
0
 def test_parse_overridden_default_namespace_reoverridden_child(self):
     """Parse with overridden default namespace and re-overridden child"""
     nss = {u'xml': u'http://www.w3.org/XML/1998/namespace',
            None: u'http://www.w3.org/1999/xhtml'}
     isrc = inputsource(SOURCE2)
     doc = parse_fragment(isrc, nss)
     self.assertEqual(EXPECTED3, doc.xml_encode())
     #Minimal node testing
     self.assertEqual(len(doc.xml_children), 1)
     first_child = doc.xml_children[0]
     self.assertEqual(first_child.xml_typecode, tree.element.xml_typecode)
     self.assertEqual(first_child.xml_qname, u'p')
     self.assertEqual(first_child.xml_namespace, u'http://www.w3.org/1999/xhtml')
     self.assertEqual(first_child.xml_prefix, None,)    
示例#26
0
文件: xslt.py 项目: dpla/akara
def akara_xslt(body, ctype, **params):
    '''
    @xslt - URL to the XSLT transform to be applied
    all other query parameters are passed ot the XSLT processor as top-level params
    
    Sample request:
    curl --request POST --data-binary "@foo.xml" --header "Content-Type: application/xml" "http://*****:*****@xslt=http://hg.akara.info/amara/trunk/raw-file/tip/demo/data/identity.xslt"
    '''
    if "@xslt" in params:
        akaraxslttransform = params["@xslt"]
    else:
        if not DEFAULT_TRANSFORM:
            raise ValueError('XSLT transform required')
        akaraxslttransform = DEFAULT_TRANSFORM
    restricted_resolver = irihelpers.resolver(authorizations=ALLOWED)
    #Using restricted_resolver should forbid Any URI access outside the specified "jails"
    #Including access through imports and includes
    body = inputsource(body, resolver=restricted_resolver)
    akaraxslttransform = inputsource(akaraxslttransform, resolver=restricted_resolver)
    result = transform(body, akaraxslttransform)

    response.add_header("Content-Type", result.parameters.media_type)
    return result 
示例#27
0
def rdfascrape(source):
    from amara.lib import inputsource
    source = inputsource(source, None)
    doc = html.parse(source.stream)
    try:
        docuri = doc.html.head.base.href
    except:
        docuri = source.uri
 
    #https://github.com/zepheira/amara/issues/8
    #statement_elems = doc.xml_select(u'//*[@property|@resource|@rel]')
    statement_elems = chain(doc.xml_select(u'//*[@property]'), doc.xml_select(u'//*[@resource]'), doc.xml_select(u'//*[@rel]'))
    triples = ( handle_statement(elem, docuri) for elem in statement_elems )
    return triples
示例#28
0
def _run(source_xml, transform_xml, expected, parameters,
         compare_method, source_uri=None, transform_uri=None,
         processor_kwargs={}):
    P = processor(**processor_kwargs)
    source = inputsource(source_xml, source_uri)
    transform = inputsource(transform_xml, transform_uri)
    P.append_transform(transform)
    if parameters is not None:
        parameters = util.parameterize(parameters)
    result = str(P.run(source, parameters=parameters))
    try:
        diff = compare_method(result, expected)
        diff = list(diff)
        assert not diff, (source_xml, transform_xml, result, expected, diff)
    except Exception, err:
        # I don't have a quick way to tell which string caused
        # the error, so let the person debugging figure it out.
        print "=== RESULT ==="
        print result
        print "=== EXPECTED ==="
        print expected
        print "=== DONE ==="
        raise
示例#29
0
 def test_parse_overridden_non_default_namespace(self):
     """Parse with overridden non-default namespace"""
     nss = {u'xml': u'http://www.w3.org/XML/1998/namespace',
            u'h': u'http://www.w3.org/1999/xhtml'}
     isrc = inputsource(SOURCE3)
     doc = parse_fragment(isrc, nss)
     self.assertEqual(EXPECTED4, doc.xml_encode())
     #doc = parse_fragment(TEST_STRING)
     #Minimal node testing
     self.assertEqual(len(doc.xml_children), 1)
     first_child = doc.xml_children[0]
     self.assertEqual(first_child.xml_typecode, tree.element.xml_typecode)
     self.assertEqual(first_child.xml_qname, u'h:p')
     self.assertEqual(first_child.xml_namespace, u'http://www.w3.org/1999/xhtml')
     self.assertEqual(first_child.xml_prefix, u'h')   
示例#30
0
def akara_xslt(body, ctype, **params):
    '''
    @xslt - URL to the XSLT transform to be applied
    all other query parameters are passed ot the XSLT processor as top-level params
    
    Sample request:
    curl --request POST --data-binary "@foo.xml" --header "Content-Type: application/xml" "http://*****:*****@xslt=http://hg.akara.info/amara/trunk/raw-file/tip/demo/data/identity.xslt"
    '''
    if "@xslt" in params:
        akaraxslttransform = params["@xslt"]
    else:
        if not DEFAULT_TRANSFORM:
            raise ValueError('XSLT transform required')
        akaraxslttransform = DEFAULT_TRANSFORM
    restricted_resolver = irihelpers.resolver(authorizations=ALLOWED)
    #Using restricted_resolver should forbid Any URI access outside the specified "jails"
    #Including access through imports and includes
    body = inputsource(body, resolver=restricted_resolver)
    akaraxslttransform = inputsource(akaraxslttransform,
                                     resolver=restricted_resolver)
    result = transform(body, akaraxslttransform)

    response.add_header("Content-Type", result.parameters.media_type)
    return result
示例#31
0
    def __init__(self,
                 location=None,
                 graph=None,
                 debug=False,
                 nsBindings = None,
                 owlEmbeddings = False):
        self.owlEmbeddings = owlEmbeddings
        self.nsBindings = nsBindings if nsBindings else {}
        self.location = location
        self.rules = {}
        self.debug = debug
        if graph:
            assert location is None,"Must supply one of graph or location"
            self.graph = graph
            if debug:
                print "RIF in RDF graph was provided"
        else:
            assert graph is None,"Must supply one of graph or location"
            if debug:
                print "RIF document URL provided ", location
            if self.location.find('http:')+1:
                req = urllib2.Request(self.location)

                ##From: http://www.diveintopython.org/http_web_services/redirects.html
                #points an 'opener' to the address to 'sniff' out final Location header
                opener = urllib2.build_opener(SmartRedirectHandler())
                f = opener.open(req)
                self.content = f.read()
            else:
                try:
                    self.content = urllib2.urlopen(self.location).read()
                except ValueError:
                    self.content = urllib2.urlopen(iri.os_path_to_uri(self.location)).read()
#                self.content = open(self.location).read()
            try:
                rdfContent = transform(self.content,inputsource(TRANSFORM_URI))
                self.graph = Graph().parse(StringIO(rdfContent))
                if debug:
                    print "Extracted rules from RIF XML format"
            except ValueError:
                try:
                    self.graph = Graph().parse(StringIO(self.content),format='xml')
                except:
                    self.graph = Graph().parse(StringIO(self.content),format='n3')
                if debug:
                    print "Extracted rules from RIF in RDF document"
        self.nsBindings.update(dict(self.graph.namespaces()))
示例#32
0
文件: tree.py 项目: abed-hawa/amara
def parse(obj, uri=None, entity_factory=None, standalone=False, validate=False, rule_handler=None):
    '''
    Parse an XML input source and return a tree

    :param obj: object with "text" to parse
    :type obj: string, Unicode object (only if you really
        know what you're doing), file-like object (stream), file path, URI or
        `amara.inputsource` object
    :param uri: optional document URI.  You really should provide this if the input source is a
        text string or stream
    :type uri: string
    :return: Parsed tree object
    :rtype: `amara.tree.entity` instance
    :raises `amara.ReaderError`: If the XML is not well formed, or there are other core parsing errors

    entity_factory - optional factory callable for creating entity nodes.  This is the
                     main lever for customizing the classes used to construct tree nodes
    standalone - similar to the standalone declaration for XML.  Asserts that the XML
                 being parsed does not require any resouces outside the given input source
                 (e.g. on the network).  In this case has the side-effect of ignoring such
                 external resources if they are encountered (which is where it diverges
                 from XML core.  In XML core that would be a fatal error)
    validate - whether or not to apply DTD validation
    rule_handler - Handler object used to perform rule matching in incremental processing.

    Examples:

    >>> import amara
    >>> MONTY_XML = """<monty>
    ...   <python spam="eggs">What do you mean "bleh"</python>
    ...   <python ministry="abuse">But I was looking for argument</python>
    ... </monty>"""
    >>> doc = amara.parse(MONTY_XML)
    >>> len(doc.xml_children)
    1

    '''
    if standalone:
        flags = PARSE_FLAGS_STANDALONE
    elif validate:
        flags = PARSE_FLAGS_VALIDATE
    else:
        flags = PARSE_FLAGS_EXTERNAL_ENTITIES
    return _parse(inputsource(obj, uri), flags, entity_factory=entity_factory,rule_handler=rule_handler)
示例#33
0
def test_copy_2():
    """identity transform"""
    _run_xml(source_xml=inputsource(
        os.path.join(module_dirname, 'addr_book1.xml')),
             source_uri="file:" + module_dirname + "/addr_book1.xml",
             transform_xml="""<?xml version="1.0"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

<xsl:template match="@*|node()">
  <xsl:copy>
    <xsl:apply-templates select="@*|node()"/>
  </xsl:copy>
</xsl:template>

</xsl:stylesheet>
""",
             expected="""<?xml version='1.0' encoding='UTF-8'?>
<?xml-stylesheet href="addr_book1.xsl" type="text/xml"?><ADDRBOOK>
    <ENTRY ID='pa'>
        <NAME>Pieter Aaron</NAME>
        <ADDRESS>404 Error Way</ADDRESS>
        <PHONENUM DESC='Work'>404-555-1234</PHONENUM>
        <PHONENUM DESC='Fax'>404-555-4321</PHONENUM>
        <PHONENUM DESC='Pager'>404-555-5555</PHONENUM>
        <EMAIL>[email protected]</EMAIL>
    </ENTRY>
    <ENTRY ID='en'>
        <NAME>Emeka Ndubuisi</NAME>
        <ADDRESS>42 Spam Blvd</ADDRESS>
        <PHONENUM DESC='Work'>767-555-7676</PHONENUM>
        <PHONENUM DESC='Fax'>767-555-7642</PHONENUM>
        <PHONENUM DESC='Pager'>800-SKY-PAGEx767676</PHONENUM>
        <EMAIL>[email protected]</EMAIL>
    </ENTRY>
    <ENTRY ID='vz'>
        <NAME>Vasia Zhugenev</NAME>
        <ADDRESS>2000 Disaster Plaza</ADDRESS>
        <PHONENUM DESC='Work'>000-987-6543</PHONENUM>
        <PHONENUM DESC='Cell'>000-000-0000</PHONENUM>
        <EMAIL>[email protected]</EMAIL>
    </ENTRY>
</ADDRBOOK>""")
示例#34
0
def test_copy_2():
    """identity transform"""
    _run_xml(
        source_xml = inputsource(os.path.join(module_dirname, 'addr_book1.xml')),
        source_uri = "file:" + module_dirname + "/addr_book1.xml",
        transform_xml = """<?xml version="1.0"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

<xsl:template match="@*|node()">
  <xsl:copy>
    <xsl:apply-templates select="@*|node()"/>
  </xsl:copy>
</xsl:template>

</xsl:stylesheet>
""",
        expected ="""<?xml version='1.0' encoding='UTF-8'?>
<?xml-stylesheet href="addr_book1.xsl" type="text/xml"?><ADDRBOOK>
    <ENTRY ID='pa'>
        <NAME>Pieter Aaron</NAME>
        <ADDRESS>404 Error Way</ADDRESS>
        <PHONENUM DESC='Work'>404-555-1234</PHONENUM>
        <PHONENUM DESC='Fax'>404-555-4321</PHONENUM>
        <PHONENUM DESC='Pager'>404-555-5555</PHONENUM>
        <EMAIL>[email protected]</EMAIL>
    </ENTRY>
    <ENTRY ID='en'>
        <NAME>Emeka Ndubuisi</NAME>
        <ADDRESS>42 Spam Blvd</ADDRESS>
        <PHONENUM DESC='Work'>767-555-7676</PHONENUM>
        <PHONENUM DESC='Fax'>767-555-7642</PHONENUM>
        <PHONENUM DESC='Pager'>800-SKY-PAGEx767676</PHONENUM>
        <EMAIL>[email protected]</EMAIL>
    </ENTRY>
    <ENTRY ID='vz'>
        <NAME>Vasia Zhugenev</NAME>
        <ADDRESS>2000 Disaster Plaza</ADDRESS>
        <PHONENUM DESC='Work'>000-987-6543</PHONENUM>
        <PHONENUM DESC='Cell'>000-000-0000</PHONENUM>
        <EMAIL>[email protected]</EMAIL>
    </ENTRY>
</ADDRBOOK>""")
示例#35
0
文件: html.py 项目: distobj/amara
def parse(source, prefixes=None, model=None, encoding=None, use_xhtml_ns=False):
    '''
    
    '''
    from amara.lib.util import set_namespaces
    #from amara.bindery import html; doc = html.parse("http://www.hitimewine.net/istar.asp?a=6&id=161153!1247")
    #parser = html5lib.HTMLParser()
    if PRE_0_90:
        def get_tree_instance():
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory)
    else:
        def get_tree_instance(namespaceHTMLElements, use_xhtml_ns=use_xhtml_ns):
            #use_xhtml_ns is a boolean, whether or not to use http://www.w3.org/1999/xhtml
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory, use_xhtml_ns)
    parser = html5lib.HTMLParser(tree=get_tree_instance)
    doc = parser.parse(inputsource(source, None).stream, encoding=encoding)
    if prefixes: set_namespaces(doc, prefixes)
    return doc
示例#36
0
    def test_basic_uri_resolver(self):
        data = [('http://foo.com/root/', 'path', 'http://foo.com/root/path'),
                ('http://foo.com/root',  'path', 'http://foo.com/path'),
                ]
        #import sys; print >> sys.stderr, filesource('sampleresource.txt').uri
        start_isrc = inputsource(FILE('sampleresource.txt'))
        #start_isrc = inputsource(filesource('sampleresource.txt').uri)
        for base, uri, exp in data:
            res = start_isrc.absolutize(uri, base)
            self.assertEqual(exp, res, "absolutize: %s %s" % (base, uri))

        base = 'foo:foo.com'
        uri = 'path'
        self.assertRaises(iri.IriError, start_isrc.absolutize, uri, base)

        base = os.getcwd()
        if base[-1] != os.sep:
            base += os.sep
        new_isrc = start_isrc.resolve(FILE('sampleresource.txt'), iri.os_path_to_uri(base))
        self.assertEqual('Spam', new_isrc.stream.readline().rstrip(), 'resolve')
示例#37
0
文件: html.py 项目: abed-hawa/amara
def parse(source, prefixes=None, model=None, encoding=None, use_xhtml_ns=False):
    '''
    Parse an input source with HTML text into an Amara Bindery tree

    Warning: if you pass a string, you must make sure it's a byte string, not a Unicode object.  You might also want to wrap it with amara.lib.inputsource.text if it's not obviously XML or HTML (for example it could be confused with a file name)
    '''
    from amara.lib.util import set_namespaces
    #from amara.bindery import html; doc = html.parse("http://www.hitimewine.net/istar.asp?a=6&id=161153!1247")
    #parser = html5lib.HTMLParser()
    if PRE_0_90:
        def get_tree_instance():
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory)
    else:
        def get_tree_instance(namespaceHTMLElements, use_xhtml_ns=use_xhtml_ns):
            #use_xhtml_ns is a boolean, whether or not to use http://www.w3.org/1999/xhtml
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory, use_xhtml_ns)
    parser = html5lib.HTMLParser(tree=get_tree_instance)
    doc = parser.parse(inputsource(source, None).stream, encoding=encoding)
    if prefixes: set_namespaces(doc, prefixes)
    return doc
示例#38
0
 def test_borrowed(source_xml=source_xml,
                   transform_xml=transform_xml,
                   expected=expected_html):
     _run_html(source_xml=inputsource(source_xml),
               transform_xml=inputsource(transform_xml),
               expected=inputsource(expected).stream.read())
示例#39
0
 def test_method(self):
     source = inputsource(self.source, 'source')
     xupdate = inputsource(self.xupdate, 'xupdate-source')
     document = apply_xupdate(source, xupdate)
     return
示例#40
0
src = inputsource(
    """<?xml version='1.0' encoding='ISO-8859-1'?>
<!DOCTYPE ROOT [
  <!ELEMENT ROOT (#PCDATA|CHILD1|CHILD2|foo:CHILD3|lang)*>
  <!ELEMENT CHILD1 (#PCDATA|GCHILD)*>
  <!ELEMENT CHILD2 (#PCDATA|GCHILD)*>
  <!ELEMENT foo:CHILD3 EMPTY>
  <!ELEMENT GCHILD EMPTY>
  <!ELEMENT lang (foo|f\xf6\xf8)*>
  <!ELEMENT foo EMPTY>
  <!ELEMENT f\xf6\xf8 EMPTY>
  <!ATTLIST CHILD1 attr1 CDATA #IMPLIED
                   attr31 CDATA #IMPLIED>
  <!ATTLIST CHILD2 attr1 CDATA #IMPLIED
                   CODE ID #REQUIRED>
  <!ATTLIST foo:CHILD3 foo:name CDATA #IMPLIED
	           xmlns:foo CDATA #IMPLIED>
  <!ATTLIST GCHILD name CDATA #IMPLIED>
  <!ATTLIST lang xml:lang CDATA #IMPLIED>
  <!ATTLIST foo xml:lang CDATA #IMPLIED>
]>
<?xml-stylesheet "Data" ?>
<ROOT>
  <!-- Test Comment -->
  <CHILD1 attr1="val1" attr31="31">
    <GCHILD name="GCHILD11"/>
    <GCHILD name="GCHILD12"/>
    Text1
  </CHILD1>
  <CHILD2 attr1="val2" CODE="1">
    <GCHILD name="GCHILD21"/>
    <GCHILD name="GCHILD22"/>
  </CHILD2>
  <foo:CHILD3 xmlns:foo="http://foo.com" foo:name="mike"/>
  <lang xml:lang="en">
    <foo xml:lang=""/>
    <foo/>
    <f\xf6\xf8/>
  </lang>
</ROOT>
<?no-data ?>
""", 'urn:domlette-test-tree')
示例#41
0
def test_choose_1():
    """`xsl:choose"""
    _run_html(
        source_xml = inputsource(os.path.join(module_name, "addr_book1.xml")),
        source_uri = "file:" + module_name + "/addr_book1.xml",
        transform_xml = """<?xml version="1.0"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
  <xsl:output method='html'/>
  <xsl:template match="/">
    <HTML>
    <HEAD><TITLE>Address Book</TITLE>
    </HEAD>
    <BODY>
    <TABLE><xsl:apply-templates/></TABLE>
    </BODY>
    </HTML>
  </xsl:template>

  <xsl:template match="ENTRY">
    <xsl:element name='TR'>
      <xsl:apply-templates select='NAME'/>
    </xsl:element>
  </xsl:template>

  <xsl:template match="NAME">
    <xsl:element name='TD'>
    <xsl:attribute name='ALIGN'>CENTER</xsl:attribute>
      <B><xsl:apply-templates/></B>
      <xsl:choose>
        <xsl:when test="text()='Pieter Aaron'">: Employee 1</xsl:when>
        <xsl:when test="text()='Emeka Ndubuisi'">: Employee 2</xsl:when>
        <xsl:otherwise>: Other Employee</xsl:otherwise>
      </xsl:choose>
    </xsl:element>
  </xsl:template>

</xsl:stylesheet>
""",
    expected ="""<HTML>
  <HEAD>
    <meta http-equiv='Content-Type' content='text/html; charset=iso-8859-1'>
    <TITLE>Address Book</TITLE>
  </HEAD>
  <BODY>
    <TABLE>
\x20\x20\x20\x20
      <TR>
        <TD ALIGN='CENTER'><B>Pieter Aaron</B>: Employee 1</TD>
      </TR>
\x20\x20\x20\x20
      <TR>
        <TD ALIGN='CENTER'><B>Emeka Ndubuisi</B>: Employee 2</TD>
      </TR>
\x20\x20\x20\x20
      <TR>
        <TD ALIGN='CENTER'><B>Vasia Zhugenev</B>: Other Employee</TD>
      </TR>

    </TABLE>
  </BODY>
</HTML>""")
示例#42
0
        def produce_final_output(response, response_headers=response_headers):
            log = sys.stderr
            if not send_browser_xslt and environ[ACTIVE_FLAG]:
                use_pi = False
                if force_server_side and force_server_side != True:
                    #True is a special flag meaning "don't delegate to the browser but still check for XSLT PIs"
                    xslt = force_server_side
                else:
                    #Check for a Stylesheet PI
                    parser = sax.reader()
                    parser.setFeature(sax.FEATURE_GENERATOR, True)
                    handler = find_xslt_pis(parser)
                    pi_iter = parser.parse(inputsource(response))
                    try:
                        #Note: only grabs the first PI.  Consider whether we should handle multiple
                        xslt = pi_iter.next()
                    except StopIteration:
                        xslt = None
                    use_pi = True
                if xslt:
                    xslt = xslt.encode('utf-8')
                    result = StringIO()
                    #self.xslt_sources = environ.get(
                    #    'wsgixml.applyxslt.xslt_sources', {})
                    source = InputSource.DefaultFactory.fromString(
                        response, uri=get_request_url(environ))
                    params = {}
                    for ns in self.stock_xslt_params:
                        params.update(setup_xslt_params(ns, self.stock_xslt_params[ns]))
                    start = time.time()

                    '''
                        processor = self.processorPool.get_processor(
                            stylesheet, self.ext_functions, self.ext_elements)
                        cherrypy.response.body = processor.run(
                            DefaultFactory.fromString(picket.document,
                                                      picket.uri),
                            topLevelParams=picket.parameters)
                        if self.default_content_type:
                            cherrypy.response.headers['Content-Type'] = self.default_content_type
                        if picket.content_type:
                            cherrypy.response.headers['Content-Type'] = picket.content_type
                    finally:
                        self.processorPool.release_processor(stylesheet)
                        '''


                    if xslt in self.processor_cache:
                        processor = self.processor_cache[xslt]
                        #Any transform would have already been loaded
                        use_pi = False
                        print >> log, 'Using cached processor instance for transform', xslt
                    else:
                        print >> log, 'Creating new processor instance for transform', xslt
                        processor = Processor.Processor()
                        if self.ext_modules:
                            processor.registerExtensionModules(self.ext_modules)
                        if self.use_wsgi_env:
                            params.update(setup_xslt_params(WSGI_NS, environ))
                        #srcAsUri = OsPathToUri()
                        #if False:
                        if environ.has_key('paste.recursive.include'):
                            #paste's recursive facilities are available, to
                            #so we can get the XSLT with a middleware call
                            #rather than a full Web invocation
                            #print environ['paste.recursive.include']
                            xslt_resp = environ['paste.recursive.include'](xslt)
                            #FIXME: this should be relative to the XSLT, not XML
                            #print xslt_resp, xslt_resp.body
                            isrc = InputSource.DefaultFactory.fromString(
                                xslt_resp.body, get_request_url(environ))
                            processor.appendStylesheet(isrc)
                        else:
                            #We have to make a full Web call to get the XSLT.
                            #4Suite will do that for us in processing the PI
                            if not use_pi:
                                uri = Uri.Absolutize(xslt, get_request_url(environ))
                                isrc = InputSource.DefaultFactory.fromUri(uri)
                                processor.appendStylesheet(isrc)
                        self.processor_cache[xslt] = processor
                    processor.run(source, outputStream=result,
                                  ignorePis=not use_pi, topLevelParams=params)

                    #Strip content-length if present (needs to be
                    #recalculated by server)
                    #Also strip content-type, which will be replaced below
                    response_headers = [ (name, value)
                        for name, value in response_headers
                            if ( name.lower()
                                 not in ['content-length', 'content-type'])
                    ]
                    #Put in the updated content type
                    imt = processor.outputParams.mediaType
                    content = result.getvalue()
                    if environ.get(CACHEABLE_FLAG):
                        self.path_cache[path] = imt, content
                    response_headers.append(('content-type', imt))
                    start_response(status, response_headers, exc_info)
                    end = time.time()
                    print >> log, '%s: elapsed time: %0.3f\n'%(xslt, end-start)
                    #environ['wsgi.errors'].write('%s: elapsed time: %0.3f\n'%(xslt, end-start))
                    return content
                    
            #If it reaches this point, no XSLT was applied.
            return
示例#43
0
 def test_borrowed(source_xml=source_xml, transform_xml=transform_xml, expected=expected_html):
     _run_html(
         source_xml = inputsource(source_xml),
         transform_xml = inputsource(transform_xml),
         expected = inputsource(expected).stream.read())
示例#44
0
 def test_method(self):
     source = inputsource(self.source, 'source')
     xupdate = inputsource(self.xupdate, 'xupdate-source')
     document = apply_xupdate(source, xupdate)
     return
示例#45
0
src = inputsource("""<?xml version='1.0' encoding='ISO-8859-1'?>
<!DOCTYPE ROOT [
  <!ELEMENT ROOT (#PCDATA|CHILD1|CHILD2|foo:CHILD3|lang)*>
  <!ELEMENT CHILD1 (#PCDATA|GCHILD)*>
  <!ELEMENT CHILD2 (#PCDATA|GCHILD)*>
  <!ELEMENT foo:CHILD3 EMPTY>
  <!ELEMENT GCHILD EMPTY>
  <!ELEMENT lang (foo|f\xf6\xf8)*>
  <!ELEMENT foo EMPTY>
  <!ELEMENT f\xf6\xf8 EMPTY>
  <!ATTLIST CHILD1 attr1 CDATA #IMPLIED
                   attr31 CDATA #IMPLIED>
  <!ATTLIST CHILD2 attr1 CDATA #IMPLIED
                   CODE ID #REQUIRED>
  <!ATTLIST foo:CHILD3 foo:name CDATA #IMPLIED
	           xmlns:foo CDATA #IMPLIED>
  <!ATTLIST GCHILD name CDATA #IMPLIED>
  <!ATTLIST lang xml:lang CDATA #IMPLIED>
  <!ATTLIST foo xml:lang CDATA #IMPLIED>
]>
<?xml-stylesheet "Data" ?>
<ROOT>
  <!-- Test Comment -->
  <CHILD1 attr1="val1" attr31="31">
    <GCHILD name="GCHILD11"/>
    <GCHILD name="GCHILD12"/>
    Text1
  </CHILD1>
  <CHILD2 attr1="val2" CODE="1">
    <GCHILD name="GCHILD21"/>
    <GCHILD name="GCHILD22"/>
  </CHILD2>
  <foo:CHILD3 xmlns:foo="http://foo.com" foo:name="mike"/>
  <lang xml:lang="en">
    <foo xml:lang=""/>
    <foo/>
    <f\xf6\xf8/>
  </lang>
</ROOT>
<?no-data ?>
""", 'urn:domlette-test-tree')