Пример #1
0
def IndexGroupToText(group_text):
  """
  Note: We cold process some tags, like:

  - Blue Link (not clickable, but still useful)
  - Red X
  """
  f = cStringIO.StringIO()
  out = html.Output(group_text, f)

  pos = 0
  for tok_id, end_pos in html.ValidTokens(group_text):
    if tok_id == html.RawData:
      out.SkipTo(pos)
      out.PrintUntil(end_pos)

    elif tok_id == html.CharEntity:  # &

      entity = group_text[pos+1 : end_pos-1]

      out.SkipTo(pos)
      out.Print(HTML_REFS[entity])
      out.SkipTo(end_pos)

    # Not handling these yet
    elif tok_id == html.HexChar:
      raise AssertionError('Hex Char %r' % group_text[pos : pos + 20])

    elif tok_id == html.DecChar:
      raise AssertionError('Dec Char %r' % group_text[pos : pos + 20])

    pos = end_pos

  out.PrintTheRest()
  return f.getvalue()
Пример #2
0
def ExtractBody(s):
    """Extract what's in between <body></body>

  The splitter needs balanced tags, and what's in <head> isn't balanced.
  """
    f = cStringIO.StringIO()
    out = html.Output(s, f)
    tag_lexer = html.TagLexer(s)

    pos = 0
    it = html.ValidTokens(s)
    while True:
        try:
            tok_id, end_pos = next(it)
        except StopIteration:
            break

        if tok_id == html.StartTag:
            tag_lexer.Reset(pos, end_pos)
            if tag_lexer.TagName() == 'body':
                body_start_right = end_pos  # right after <body>

                out.SkipTo(body_start_right)
                body_end_left, _ = html.ReadUntilEndTag(it, tag_lexer, 'body')

                out.PrintUntil(body_end_left)
                break

        pos = end_pos

    return f.getvalue()
Пример #3
0
def ExpandLinks(s):
    """
  Expand $xref:bash and so forth
  """
    f = cStringIO.StringIO()
    out = html.Output(s, f)

    tag_lexer = html.TagLexer(s)

    pos = 0

    it = html.ValidTokens(s)
    while True:
        try:
            tok_id, end_pos = next(it)
        except StopIteration:
            break

        if tok_id == html.StartTag:

            tag_lexer.Reset(pos, end_pos)
            if tag_lexer.TagName() == 'a':
                open_tag_right = end_pos

                href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
                if href_start == -1:
                    continue

                # TODO: Need to unescape like GetAttr()
                href = s[href_start:href_end]

                new = None
                m = _SHORTCUT_RE.match(href)
                if m:
                    abbrev_name, arg = m.groups()
                    if not arg:
                        close_tag_left, _ = html.ReadUntilEndTag(
                            it, tag_lexer, 'a')
                        arg = s[open_tag_right:close_tag_left]

                    func = _ABBREVIATIONS.get(abbrev_name)
                    if not func:
                        raise RuntimeError('Invalid abbreviation %r' %
                                           abbrev_name)
                    new = func(arg)

                if new is not None:
                    out.PrintUntil(href_start)
                    f.write(cgi.escape(new))
                    out.SkipTo(href_end)

        pos = end_pos

    out.PrintTheRest()

    return f.getvalue()
 def testShPrompt(self):
   r = oil_doc._PROMPT_LINE_RE
   line = 'oil$ ls -l&lt;TAB&gt;  # comment'
   m = r.match(line)
   print(m.groups())
   print(m.group(2))
   print(m.end(2))
   plugin = oil_doc.ShPromptPlugin(line, 0, len(line))
   out = html.Output(line, sys.stdout)
   plugin.PrintHighlighted(out)
Пример #5
0
def RemoveComments(s):
    """ Remove <!-- comments --> """
    f = cStringIO.StringIO()
    out = html.Output(s, f)

    tag_lexer = html.TagLexer(s)

    pos = 0

    for tok_id, end_pos in html.ValidTokens(s):
        if tok_id == html.Comment:
            value = s[pos:end_pos]
            # doc/release-index.md has <!-- REPLACE_WITH_DATE --> etc.
            if 'REPLACE' not in value:
                out.PrintUntil(pos)
                out.SkipTo(end_pos)
        pos = end_pos

    out.PrintTheRest()
    return f.getvalue()
def HighlightLine(line):
    """Convert a line of text to HTML.

  Topics are highlighted and X made red.

  Args:
    line: RAW SPAN of HTML that is already escaped.

  Returns:
    The HTML with some tags inserted.
  """
    f = cStringIO.StringIO()
    out = html.Output(line, f)

    pos = 0  # position within line

    if line.startswith('X '):
        out.Print(X_LEFT_SPAN)
        out.PrintUntil(2)
        out.Print('</span>')
        pos = 2
    elif line.startswith('  '):
        pos = 2
    else:
        return line

    # Highlight [Section] at the start of a line.
    m = SECTION_RE.match(line, pos)
    if m:
        href = _StringToHref(m.group(1))

        out.PrintUntil(m.start(1))
        out.Print('<a href="help.html#%s" class="level2">' % href)
        out.PrintUntil(m.end(1))  # anchor
        out.Print('</a>')

        pos = m.end(0)  # ADVANCE

    _WHITESPACE = re.compile(r'[ ]+')
    m = _WHITESPACE.match(line, pos)
    assert m, 'Expected whitespace %r' % line

    pos = m.end(0)

    done = False
    while not done:
        # Now just match one
        m = TOPIC_RE.match(line, pos)
        if not m or m.group(2) in _NOT_A_TOPIC:
            break

        if m.group(1):
            out.PrintUntil(m.start(1))
            out.Print(X_LEFT_SPAN)
            out.PrintUntil(m.end(1))
            out.Print('</span>')

        # The linked topic
        topic = m.group(2)

        out.PrintUntil(m.start(2))
        out.Print('<a href="help.html#%s">' % topic)
        out.PrintUntil(m.end(2))
        out.Print('</a>')

        # Trailing 3 spaces required to continue.
        if not m.group(4):
            done = True

        pos = m.end(0)

    out.PrintTheRest()

    return f.getvalue()
Пример #7
0
def HighlightCode(s, default_highlighter):
    """
  Algorithm:
  1. Collect what's inside <pre><code> ...
  2. Then read lines with ShPromptPlugin.
  3. If the line looks like a shell prompt and command, highlight them with
     <span>
  """
    f = cStringIO.StringIO()
    out = html.Output(s, f)

    tag_lexer = html.TagLexer(s)

    pos = 0

    it = html.ValidTokens(s)

    while True:
        try:
            tok_id, end_pos = next(it)
        except StopIteration:
            break

        if tok_id == html.StartTag:

            tag_lexer.Reset(pos, end_pos)
            if tag_lexer.TagName() == 'pre':
                pre_start_pos = pos
                pos = end_pos

                try:
                    tok_id, end_pos = next(it)
                except StopIteration:
                    break

                tag_lexer.Reset(pos, end_pos)
                if tok_id == html.StartTag and tag_lexer.TagName() == 'code':

                    css_class = tag_lexer.GetAttr('class')
                    code_start_pos = end_pos

                    if css_class is None:
                        slash_code_left, slash_code_right = \
                            html.ReadUntilEndTag(it, tag_lexer, 'code')

                        if default_highlighter is not None:
                            if default_highlighter == 'oil-sh':
                                out.PrintUntil(code_start_pos)

                                # Using ShPromptPlugin because it does the comment highlighting we want!
                                plugin = ShPromptPlugin(
                                    s, code_start_pos, slash_code_left)
                                plugin.PrintHighlighted(out)

                                out.SkipTo(slash_code_left)
                            else:
                                raise RuntimeError(
                                    'Unknown default highlighter %r' %
                                    default_highlighter)

                    elif css_class.startswith('language'):
                        slash_code_left, slash_code_right = \
                            html.ReadUntilEndTag(it, tag_lexer, 'code')

                        if css_class == 'language-sh-prompt':
                            # Here's we're KEEPING the original <pre><code>
                            # Print everything up to and including <pre><code language="...">
                            out.PrintUntil(code_start_pos)

                            plugin = ShPromptPlugin(s, code_start_pos,
                                                    slash_code_left)
                            plugin.PrintHighlighted(out)

                            out.SkipTo(slash_code_left)

                        elif css_class == 'language-oil':
                            # TODO: Write an Oil syntax highlighter.
                            pass

                        elif css_class == 'language-osh-help-topics':
                            # TODO: Link to osh-help.html, instead of oil-help.html
                            out.PrintUntil(code_start_pos)

                            plugin = HelpTopicsPlugin(s, code_start_pos,
                                                      slash_code_left, 'osh')
                            plugin.PrintHighlighted(out)

                            out.SkipTo(slash_code_left)

                        elif css_class == 'language-oil-help-topics':

                            out.PrintUntil(code_start_pos)

                            plugin = HelpTopicsPlugin(s, code_start_pos,
                                                      slash_code_left, 'oil')
                            plugin.PrintHighlighted(out)

                            out.SkipTo(slash_code_left)

                        else:  # language-*: Use Pygments

                            # We REMOVIE the original <pre><code> because Pygments gives you a <pre> already

                            # We just read closing </code>, and the next one should be </pre>.
                            try:
                                tok_id, end_pos = next(it)
                            except StopIteration:
                                break
                            tag_lexer.Reset(slash_code_right, end_pos)
                            assert tok_id == html.EndTag, tok_id
                            assert tag_lexer.TagName(
                            ) == 'pre', tag_lexer.TagName()
                            slash_pre_right = end_pos

                            out.PrintUntil(pre_start_pos)

                            lang = css_class[len('language-'):]
                            plugin = PygmentsPlugin(s, code_start_pos,
                                                    slash_code_left, lang)
                            plugin.PrintHighlighted(out)

                            out.SkipTo(slash_pre_right)
                            f.write('<!-- done pygments -->\n')

        pos = end_pos

    out.PrintTheRest()

    return f.getvalue()