def test_expat_xmldecl(): e = parse.tree( b"<?xml version='1.0' encoding='utf-8' standalone='yes'?><a/>", parse.Expat(), parse.NS(html), parse.Node(), validate=True) assert not isinstance(e[0], xml.XML) e = parse.tree(b"<a/>", parse.Expat(xmldecl=True), parse.NS(html), parse.Node(), validate=True) assert not isinstance(e[0], xml.XML) e = parse.tree(b"<?xml version='1.0'?><a/>", parse.Expat(xmldecl=True), parse.NS(html), parse.Node(), validate=True) assert isinstance(e[0], xml.XML) assert e[0].content == 'version="1.0"' e = parse.tree(b"<?xml version='1.0' encoding='utf-8'?><a/>", parse.Expat(xmldecl=True), parse.NS(html), parse.Node(), validate=True) assert isinstance(e[0], xml.XML) assert e[0].content == 'version="1.0" encoding="utf-8"' e = parse.tree( b"<?xml version='1.0' encoding='utf-8' standalone='yes'?><a/>", parse.Expat(xmldecl=True), parse.NS(html), parse.Node(), validate=True) assert isinstance(e[0], xml.XML) assert e[0].content == 'version="1.0" encoding="utf-8" standalone="yes"'
def xsc2txt(instream, outstream, title, width): e = parse.tree(parse.Stream(instream), parse.SGMLOP(), parse.NS(doc), parse.Node(pool=xsc.docpool())) if title is None: title = xsc.Null else: title = doc.title(title) e = html.html(html.body(doc.section(title, e))) e = e.conv() outstream.write(html.astext(e, width=width))
def test_parseurls(): # Check proper URL handling when parsing ``URLAttr`` or ``StyleAttr`` attributes node = parse.tree( b'<a href="4.html" style="background-image: url(3.gif);"/>', parse.Expat(), parse.NS(html), parse.Node(base="root:1/2.html"), validate=True) assert str(node[0]["style"]) == "background-image: url(root:1/3.gif)" assert node[0]["style"].urls() == [url.URL("root:1/3.gif")] assert str(node[0]["href"]) == "root:1/4.html" assert node[0]["href"].forInput( root="gurk/hurz.html") == url.URL("gurk/1/4.html")
def test_parsingmethods(): t = "abc\U00012345\u3042xyz" s = f'<?xml version="1.0" encoding="utf-8"?><a title="{t}">{t}</a>' b = s.encode("utf-8") def check(*pipeline): node = parse.tree(*pipeline, validate=True) node = node.walknodes(a)[0] assert str(node) == t assert str(node["title"]) == t prefixes = {None: a.xmlns} pool = xsc.Pool(a) check(b, parse.Expat(), parse.NS(a.xmlns), parse.Node(pool)) check(s, parse.Encoder(encoding="utf-8"), parse.Expat(), parse.NS(a.xmlns), parse.Node(pool)) check(parse.Iter(b), parse.Expat(), parse.NS(a.xmlns), parse.Node(pool)) # parse byte by byte check(parse.Stream(io.BytesIO(b), bufsize=1), parse.Expat(), parse.NS(a.xmlns), parse.Node(pool)) check(parse.ETree(cElementTree.fromstring(b), defaultxmlns=a.xmlns), parse.Node(pool))
def makexnd(stream, encoding=None, shareattrs="dupes", model="simple"): # :obj:`stream` can be a stream, an :class:`URL` or ``str``/``bytes`` encoding = None if isinstance(stream, str): encoding = "utf-8" stream = stream.encode(encoding) node = parse.tree(stream, parse.Expat(encoding=encoding), parse.NS(tld), parse.Node()) # get and convert the taglib object xnd = misc.first(node.walknodes(tld.taglib)).asxnd(model=model) if shareattrs=="dupes": xnd.shareattrs(False) elif shareattrs=="all": xnd.shareattrs(True) return xnd
def test_nsparse(): # A prepopulated prefix mapping and xmlns attributes should work together xml = b""" <x:a> <x:a xmlns:x='http://www.w3.org/1999/xhtml'> <x:a xmlns:x='http://xmlns.livinglogic.de/xist/ns/doc'>gurk</x:a> </x:a> </x:a> """ check = doc.a(html.a(doc.a("gurk"))) node = parse.tree(xml, parse.Expat(), parse.NS(x=doc), parse.Node(), validate=True) node = node.walknodes( xsc.Element)[0].compacted() # get rid of the Frag and whitespace assert node == check
def getdoc(thing, format): if thing.__doc__ is None: return xsc.Null # Remove indentation lines = textwrap.dedent(thing.__doc__).split("\n") # remove empty lines while lines and not lines[0]: del lines[0] while lines and not lines[-1]: del lines[-1] text = "\n".join(lines) modulename = _getmodulename(thing) if inspect.ismethod(thing): base = f"METHOD-DOCSTRING({modulename}.{thing.__class__.__name__}.{thing.__qualname__})" elif isinstance(thing, property): base = f"PROPERTY-DOCSTRING({modulename}.{thing})" elif inspect.isfunction(thing): base = f"FUNCTION-DOCSTRING({modulename}.{thing.__qualname__})" elif inspect.isclass(thing): base = f"CLASS-DOCSTRING({modulename}.{thing.__qualname__})" elif inspect.ismodule(thing): base = f"MODULE-DOCSTRING({modulename})" else: base = "DOCSTRING" lformat = format.lower() if lformat == "plaintext": return xsc.Text(text) elif lformat == "restructuredtext": from ll.xist.ns import rest, doc return rest.fromstring(text, base=base).conv(target=doc) elif lformat == "xist": from ll.xist.ns import doc node = parse.tree(parse.String(text), parse.SGMLOP(), parse.NS(doc), parse.Node(pool=xsc.docpool(), base=base)) if not node[ p]: # optimization: one paragraph docstrings don't need a <p> element. node = p(node) if inspect.ismethod(thing): # Use the original method instead of the decorator realthing = thing while hasattr(realthing, "__wrapped__"): realthing = realthing.__wrapped__ for ref in node.walknodes(pyref): if "module" not in ref.attrs: ref["module"] = _getmodulename(realthing) if "class_" not in ref.attrs: ref["class_"] = thing.__self__.__class__.__name__ if "method" not in ref.attrs: ref["method"] = thing.__name__ elif inspect.isfunction(thing): # Use the original method instead of the decorator while hasattr(thing, "__wrapped__"): thing = thing.__wrapped__ for ref in node.walknodes(pyref): if "module" not in ref.attrs: ref["module"] = _getmodulename(thing) elif inspect.isclass(thing): for ref in node.walknodes(pyref): if "module" not in ref.attrs: ref["module"] = _getmodulename(thing) if "class_" not in ref.attrs: ref["class_"] = thing.__name__ elif inspect.ismodule(thing): for ref in node.walknodes(pyref): if "module" not in ref.attrs: ref["module"] = thing.__name__ return node else: raise ValueError(f"unsupported __docformat__ {format!r}")
def test_parse_tidy_empty(): e = parse.tree(b"", parse.Tidy(), parse.NS(), parse.Node(), validate=True) assert not e
def test_frag(): e = parse.tree(b"das ist <b>klaus</b>. das ist <b>erich</b>", parse.SGMLOP(), parse.NS(html), parse.Node()) assert "".join(map(str, e.walknodes(e // html.b))) == "klauserich"
def test_comments(): d = b'<html><head><style type="text/css">/*nix*/ p{/*nix*/ color: red;}</style></head><body><p>gurk</p></body></html>' node = parse.tree(d, parse.Expat(), parse.NS(html), parse.Node()) css.applystylesheets(node) assert str(node.walknodes(html.p)[0].attrs.style) == "color: red;"