示例#1
0
 def __call__(self, data, **metadata):
     if strutils.is_xml(data):
         parser = lxml.etree.HTMLParser(strip_cdata=True, remove_blank_text=True)
         d = lxml.html.fromstring(data, parser=parser)
         docinfo = d.getroottree().docinfo
         s = lxml.etree.tostring(d, pretty_print=True, doctype=docinfo.doctype, encoding="utf8")
         return "HTML", format_text(s)
示例#2
0
 def render_priority(self, data: bytes, *, content_type: Optional[str] = None, **metadata) -> float:
     if not data:
         return 0
     if content_type in self.__content_types:
         return 1
     elif strutils.is_xml(data):
         return 0.4
     return 0
示例#3
0
 def __call__(self, data, **metadata):
     if strutils.is_xml(data):
         parser = lxml.etree.HTMLParser(strip_cdata=True,
                                        remove_blank_text=True)
         d = lxml.html.fromstring(data, parser=parser)
         docinfo = d.getroottree().docinfo
         s = lxml.etree.tostring(d,
                                 pretty_print=True,
                                 doctype=docinfo.doctype,
                                 encoding='utf8')
         return "HTML", format_text(s)
    def __call__(self, data, **metadata):
        if strutils.is_xml(data):
            parser = lxml.etree.HTMLParser(
                strip_cdata=True,
                remove_blank_text=True
            )
            d = lxml.html.fromstring(data, parser=parser)
            docinfo = d.getroottree().docinfo

            def piglify(src):
                words = src.split()
                ret = ''
                for word in words:
                    idx = -1
                    while word[idx] in string.punctuation and (idx * -1) != len(word):
                        idx -= 1
                    if word[0].lower() in 'aeiou':
                        if idx == -1:
                            ret += word[0:] + "hay"
                        else:
                            ret += word[0:len(word) + idx + 1] + "hay" + word[idx + 1:]
                    else:
                        if idx == -1:
                            ret += word[1:] + word[0] + "ay"
                        else:
                            ret += word[1:len(word) + idx + 1] + word[0] + "ay" + word[idx + 1:]
                    ret += ' '
                return ret.strip()

            def recurse(root):
                if hasattr(root, 'text') and root.text:
                    root.text = piglify(root.text)
                if hasattr(root, 'tail') and root.tail:
                    root.tail = piglify(root.tail)

                if len(root):
                    for child in root:
                        recurse(child)

            recurse(d)

            s = lxml.etree.tostring(
                d,
                pretty_print=True,
                doctype=docinfo.doctype
            )
            return "HTML", contentviews.format_text(s)
示例#5
0
 def __call__(self, data, **metadata):
     headers = metadata.get("headers", {})
     ctype = headers.get("content-type")
     if data and ctype:
         ct = http.parse_content_type(ctype) if ctype else None
         ct = "%s/%s" % (ct[0], ct[1])
         if ct in content_types_map:
             return content_types_map[ct][0](data, **metadata)
         elif strutils.is_xml(data):
             return get("XML")(data, **metadata)
     if metadata.get("query"):
         return get("Query")(data, **metadata)
     if data and strutils.is_mostly_bin(data):
         return get("Hex")(data)
     if not data:
         return "No content", []
     return get("Raw")(data)
示例#6
0
 def __call__(self, data, **metadata):
     headers = metadata.get("headers", {})
     ctype = headers.get("content-type")
     if data and ctype:
         ct = http.parse_content_type(ctype) if ctype else None
         ct = "%s/%s" % (ct[0], ct[1])
         if ct in content_types_map:
             return content_types_map[ct][0](data, **metadata)
         elif strutils.is_xml(data):
             return get("XML")(data, **metadata)
     if metadata.get("query"):
         return get("Query")(data, **metadata)
     if data and strutils.is_mostly_bin(data):
         return get("Hex")(data)
     if not data:
         return "No content", []
     return get("Raw")(data)
示例#7
0
    def __call__(self, data, **metadata):
        if strutils.is_xml(data):
            parser = lxml.etree.HTMLParser(strip_cdata=True,
                                           remove_blank_text=True)
            d = lxml.html.fromstring(data, parser=parser)
            docinfo = d.getroottree().docinfo

            def piglify(src):
                words = src.split()
                ret = ''
                for word in words:
                    idx = -1
                    while word[idx] in string.punctuation and (
                            idx * -1) != len(word):
                        idx -= 1
                    if word[0].lower() in 'aeiou':
                        if idx == -1:
                            ret += word[0:] + "hay"
                        else:
                            ret += word[0:len(word) + idx +
                                        1] + "hay" + word[idx + 1:]
                    else:
                        if idx == -1:
                            ret += word[1:] + word[0] + "ay"
                        else:
                            ret += word[1:len(word) + idx +
                                        1] + word[0] + "ay" + word[idx + 1:]
                    ret += ' '
                return ret.strip()

            def recurse(root):
                if hasattr(root, 'text') and root.text:
                    root.text = piglify(root.text)
                if hasattr(root, 'tail') and root.tail:
                    root.tail = piglify(root.tail)

                if len(root):
                    for child in root:
                        recurse(child)

            recurse(d)

            s = lxml.etree.tostring(d,
                                    pretty_print=True,
                                    doctype=docinfo.doctype)
            return "HTML", contentviews.format_text(s)
示例#8
0
 def __call__(self, data, **metadata):
     headers = metadata.get("headers", {})
     ctype = headers.get("content-type")
     if data and ctype:
         ct = http.parse_content_type(ctype) if ctype else None
         ct = "{}/{}".format(ct[0], ct[1])
         if ct in contentviews.content_types_map:
             return contentviews.content_types_map[ct][0](data, **metadata)
         elif strutils.is_xml(data):
             return contentviews.get("XML/HTML")(data, **metadata)
         elif ct.startswith("image/"):
             return contentviews.get("Image")(data, **metadata)
     if metadata.get("query"):
         return contentviews.get("Query")(data, **metadata)
     if data and strutils.is_mostly_bin(data):
         return contentviews.get("Hex")(data)
     if not data:
         return "No content", []
     return contentviews.get("Raw")(data)
示例#9
0
def test_is_xml():
    assert not strutils.is_xml(b"foo")
    assert strutils.is_xml(b"<foo")
    assert strutils.is_xml(b"  \n<foo")
示例#10
0
def test_is_xml():
    assert not strutils.is_xml(b"foo")
    assert strutils.is_xml(b"<foo")
    assert strutils.is_xml(b"  \n<foo")