Пример #1
0
def parseHTML(str):
    doc = html5lib.parse(u(str), treebuilder='lxml', namespaceHTMLElements=False)
    body = doc.getroot()[1]
    if body.text is None:
        return list(body.iterchildren())
    else:
        return [body.text] + list(body.iterchildren())
Пример #2
0
def die(msg, *formatArgs):
    msg = u"\033[1;31mFATAL ERROR:\033[0m "+u(msg).format(*map(u, formatArgs))
    if msg not in messages:
        messages.add(msg)
        print msg
    if not config.debug:
        sys.exit(1)
Пример #3
0
def parseHTML(str):
    doc = html5lib.parse(u(str),
                         treebuilder='lxml',
                         namespaceHTMLElements=False)
    body = doc.getroot()[1]
    if body.text is None:
        return list(body.iterchildren())
    else:
        return [body.text] + list(body.iterchildren())
Пример #4
0
def linkTextsFromElement(el, preserveCasing=False):
    from lib.htmlhelpers import textContent
    if el.get('title') == '':
        return []
    elif el.get('title'):
        texts = [u(x.strip()) for x in el.get('title').split('|')]
    else:
        texts = [textContent(el).strip()]
    if preserveCasing:
        return texts
    else:
        return [t.lower() for t in texts]
Пример #5
0
def linkTextsFromElement(el, preserveCasing=False):
    from lib.htmlhelpers import textContent
    if el.get('title') == '':
        return []
    elif el.get('title'):
        texts = [u(x.strip()) for x in el.get('title').split('|')]
    else:
        texts = [textContent(el).strip()]
    if preserveCasing:
        return texts
    else:
        return [t.lower() for t in texts]
Пример #6
0
 def setSpecData(self, spec):
     if spec.status in ("ED", "DREAM", "UD"):
         self.specStatus = "ED"
     else:
         self.specStatus = "TR"
         # I'll want to make this more complex later,
         # to enforce pubrules linking policy.
     self.specLevel = spec.level
     self.specName = spec.shortname
     self.specVName = spec.shortname + "-" + u(spec.level)
     # Need to get a real versioned shortname,
     # with the possibility of overriding the "shortname-level" pattern.
     self.removeSameSpecRefs()
Пример #7
0
 def setSpecData(self, spec):
     if spec.status in ("ED", "DREAM", "UD"):
         self.specStatus = "ED"
     else:
         self.specStatus = "TR"
         # I'll want to make this more complex later,
         # to enforce pubrules linking policy.
     self.specLevel = spec.level
     self.specName = spec.shortname
     self.specVName = spec.shortname + "-" + u(spec.level)
     # Need to get a real versioned shortname,
     # with the possibility of overriding the "shortname-level" pattern.
     self.removeSameSpecRefs()
Пример #8
0
def innerHTML(el):
    if el is None:
        return u''
    return u((el.text or u'') +
             u''.join(u(html.tostring(x, encoding="unicode")) for x in el))
Пример #9
0
def innerHTML(el):
    if el is None:
        return u''
    return u((el.text or u'') + u''.join(u(html.tostring(x, encoding="unicode")) for x in el))
Пример #10
0
def outerHTML(el):
    if el is None:
        return u''
    return u(html.tostring(el, with_tail=False, encoding="unicode"))
Пример #11
0
def say(msg, *formatArgs):
    if not config.quiet:
        print u(msg).format(*map(u, formatArgs))
Пример #12
0
def warn(msg, *formatArgs):
    if not config.quiet:
        msg = u"\033[1;33mWARNING:\033[0m "+u(msg).format(*map(u, formatArgs))
        if msg not in messages:
            messages.add(msg)
            print msg
Пример #13
0
def escapeAttr(str):
    return u(str).replace(u'&', u'&').replace(u"'", u''').replace(
        u'"', u'"')
Пример #14
0
def escapeHTML(str):
    # Escape HTML
    return u(str).replace(u'&', u'&amp;').replace(u'<', u'&lt;')
Пример #15
0
def parseDocument(str):
    doc = html5lib.parse(u(str),
                         treebuilder='lxml',
                         namespaceHTMLElements=False)
    return doc
Пример #16
0
def parseDocument(str):
    doc = html5lib.parse(u(str), treebuilder='lxml', namespaceHTMLElements=False)
    return doc
Пример #17
0
def textContent(el):
    return u(html.tostring(el, method='text', with_tail=False, encoding="unicode"))
Пример #18
0
def textContent(el):
    return u(
        html.tostring(el, method='text', with_tail=False, encoding="unicode"))
Пример #19
0
def headingLevelOfElement(el):
    for el in relevantHeadings(el, levels=[2, 3, 4, 5, 6]):
        if el.get('data-level') is not None:
            return u(el.get('data-level'))
    return None
Пример #20
0
def escapeAttr(str):
    return u(str).replace(u'&', u'&amp;').replace(u"'", u'&apos;').replace(u'"', u'&quot;')
Пример #21
0
def escapeHTML(str):
    # Escape HTML
    return u(str).replace(u'&', u'&amp;').replace(u'<', u'&lt;')
Пример #22
0
def outerHTML(el):
    if el is None:
        return u''
    return u(html.tostring(el, with_tail=False, encoding="unicode"))
Пример #23
0
def headingLevelOfElement(el):
    for el in relevantHeadings(el, levels=[2,3,4,5,6]):
        if el.get('data-level') is not None:
            return u(el.get('data-level'))
    return None