Пример #1
0
    def _prepareForLoading(self, xhtmlString):
        u"""Prepares the raw xhtml string for loading into zDom."""  #$NON-NLS-1$
        xhtmlString = xhtmlString.lstrip()
        if xhtmlString.startswith(u"<!DOCTYPE"):  #$NON-NLS-1$
            xhtmlString = xhtmlString[xhtmlString.find(u">") +
                                      1:]  #$NON-NLS-1$

        xhtmlString = xhtmlString.replace(u'&nbsp;',
                                          u' ')  #$NON-NLS-1$ #$NON-NLS-2$
        (bOk,
         xhtmlString) = self._cleanupMsOffice(xhtmlString)  #@UnusedVariable

        # if the string content does not have a <body/> then convert to xhtml and wrap it
        # with <html><body/></html>
        if not xhtmlutil.hasBody(xhtmlString):
            if not xhtmlutil.hasXhtmlMarkup(xhtmlString):
                self.messages.append(
                    u"Converting plain text to xhtml markup.")  #$NON-NLS-1$
                # convert plain text to xhtml
                transformer = ZTextToXhtmlTransformer()
                xhtmlString = transformer.transform(xhtmlString)
            xhtmlString = xhtmlutil.wrapHtmlBody(xhtmlString)
            self.messages.append(
                u"Adding <html><body></body></html> wrapper.")  #$NON-NLS-1$
        return xhtmlString
Пример #2
0
def _internalRunTidy(htmlSrc, options=XHTML_OPTIONS):
    # Runs tidy and returns tuple (html, errorList)
    unsupportedOptions = ["raw", "output_error", "show_warnings"]
    try:
        # remove unsupported options.
        if options:
            options['tidy_mark'] = 0
            for s in unsupportedOptions:
                if options.has_key(s):
                    del options[s]
    except:
        pass

    lineOffset = 0
    if htmlSrc:
        # escape illegal entities. E.g. convert &##! to &amp;##!
        try:
            htmlSrc = ILLEGAL_ENTITY_RE.sub(u"&amp;\g<2>",
                                            htmlSrc)  #$NON-NLS-1$
        except:
            pass
    if not hasBody(htmlSrc):
        # wrap content inside a <html><head/><body> [CONTENT] </body></html>
        htmlSrc = XHTML_TEMPLATE % htmlSrc
        lineOffset = XHTML_TEMPLATE_LINE_OFFSET

    tidySrc = convertToUtf8(htmlSrc)
    tidyRet = tidy.parseString(tidySrc, **options)
    errList = []
    severities = dict(W=ZTidyError.WARN,
                      E=ZTidyError.ERROR,
                      C=ZTidyError.OTHER)
    for err in tidyRet.get_errors():
        te = ZTidyError()
        if err.line is not None:
            te.line = err.line - lineOffset
        if err.col is not None:
            te.col = err.col
        if err.message is not None:
            te.message = err.message
        te.severity = ZTidyError.NONE
        if severities.has_key(err.severity):
            te.severity = severities[err.severity]
        errList.append(te)

    outHtml = str(tidyRet)
    return (convertToUnicode(outHtml), errList)
Пример #3
0
def _internalRunTidy(htmlSrc, options = XHTML_OPTIONS):
    # Runs tidy and returns tuple (html, errorList)
    unsupportedOptions = ["raw", "output_error", "show_warnings"]
    try:
        # remove unsupported options.
        if options:
            options['tidy_mark'] = 0
            for s in unsupportedOptions:
                if options.has_key(s):
                    del options[s]
    except:
        pass

    lineOffset = 0
    if htmlSrc:
        # escape illegal entities. E.g. convert &##! to &amp;##!
        try:
            htmlSrc = ILLEGAL_ENTITY_RE.sub(u"&amp;\g<2>", htmlSrc)  #$NON-NLS-1$
        except:
            pass        
    if not hasBody(htmlSrc):
        # wrap content inside a <html><head/><body> [CONTENT] </body></html>
        htmlSrc = XHTML_TEMPLATE % htmlSrc
        lineOffset = XHTML_TEMPLATE_LINE_OFFSET

    tidySrc = convertToUtf8(htmlSrc)
    tidyRet = tidy.parseString(tidySrc, **options)
    errList = []
    severities = dict(W=ZTidyError.WARN, E=ZTidyError.ERROR, C=ZTidyError.OTHER)
    for err in tidyRet.get_errors():
        te = ZTidyError()
        if err.line is not None:
            te.line = err.line - lineOffset
        if err.col is not None:
            te.col = err.col
        if err.message is not None:
            te.message = err.message
        te.severity = ZTidyError.NONE
        if severities.has_key(err.severity):
            te.severity = severities[err.severity]
        errList.append(te)

    outHtml = str(tidyRet)
    return (convertToUnicode(outHtml), errList)
Пример #4
0
    def _prepareForLoading(self, xhtmlString):
        u"""Prepares the raw xhtml string for loading into zDom."""  #$NON-NLS-1$
        xhtmlString = xhtmlString.lstrip()
        if xhtmlString.startswith(u"<!DOCTYPE"): #$NON-NLS-1$
            xhtmlString = xhtmlString[xhtmlString.find(u">") + 1:] #$NON-NLS-1$

        xhtmlString = xhtmlString.replace(u'&nbsp;', u' ') #$NON-NLS-1$ #$NON-NLS-2$
        (bOk, xhtmlString) = self._cleanupMsOffice(xhtmlString) #@UnusedVariable

        # if the string content does not have a <body/> then convert to xhtml and wrap it
        # with <html><body/></html>
        if not xhtmlutil.hasBody(xhtmlString):
            if not xhtmlutil.hasXhtmlMarkup(xhtmlString):
                self.messages.append(u"Converting plain text to xhtml markup.")  #$NON-NLS-1$
                # convert plain text to xhtml
                transformer = ZTextToXhtmlTransformer()
                xhtmlString = transformer.transform(xhtmlString)
            xhtmlString = xhtmlutil.wrapHtmlBody(xhtmlString)
            self.messages.append(u"Adding <html><body></body></html> wrapper.")  #$NON-NLS-1$
        return xhtmlString