def get_tokens_unprocessed(self, data): sql = SqlLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(data): line = match.group() if line.startswith('sqlite> ') or line.startswith(' ...> '): insertions.append( (len(curcode), [(0, Generic.Prompt, line[:8])])) curcode += line[8:] else: if curcode: for item in do_insertions( insertions, sql.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] if line.startswith('SQL error: '): yield (match.start(), Generic.Traceback, line) else: yield (match.start(), Generic.Output, line) if curcode: for item in do_insertions(insertions, sql.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): erlexer = ErlangLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() m = self._prompt_re.match(line) if m is not None: end = m.end() insertions.append( (len(curcode), [(0, Generic.Prompt, line[:end])])) curcode += line[end:] else: if curcode: yield from do_insertions( insertions, erlexer.get_tokens_unprocessed(curcode)) curcode = '' insertions = [] if line.startswith('*'): yield match.start(), Generic.Traceback, line else: yield match.start(), Generic.Output, line if curcode: yield from do_insertions(insertions, erlexer.get_tokens_unprocessed(curcode))
def get_tokens_unprocessed(self, text): slexer = SLexer(**self.options) current_code_block = '' insertions = [] for match in line_re.finditer(text): line = match.group() if line.startswith('>') or line.startswith('+'): # Colorize the prompt as such, # then put rest of line into current_code_block insertions.append( (len(current_code_block), [(0, Generic.Prompt, line[:2])])) current_code_block += line[2:] else: # We have reached a non-prompt line! # If we have stored prompt lines, need to process them first. if current_code_block: # Weave together the prompts and highlight code. yield from do_insertions( insertions, slexer.get_tokens_unprocessed(current_code_block)) # Reset vars for next code block. current_code_block = '' insertions = [] # Now process the actual line itself, this is output from R. yield match.start(), Generic.Output, line # If we happen to end on a code block with nothing after it, need to # process the last code block. This is neither elegant nor DRY so # should be changed. if current_code_block: yield from do_insertions( insertions, slexer.get_tokens_unprocessed(current_code_block))
def get_tokens_unprocessed(self, text): dylexer = DylanLexer(**self.options) curcode = '' insertions = [] for match in self._line_re.finditer(text): line = match.group() m = self._prompt_re.match(line) if m is not None: end = m.end() insertions.append( (len(curcode), [(0, Generic.Prompt, line[:end])])) curcode += line[end:] else: if curcode: for item in do_insertions( insertions, dylexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, dylexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): exlexer = ElixirLexer(**self.options) curcode = '' in_error = False insertions = [] for match in line_re.finditer(text): line = match.group() if line.startswith('** '): in_error = True insertions.append( (len(curcode), [(0, Generic.Error, line[:-1])])) curcode += line[-1:] else: m = self._prompt_re.match(line) if m is not None: in_error = False end = m.end() insertions.append( (len(curcode), [(0, Generic.Prompt, line[:end])])) curcode += line[end:] else: if curcode: yield from do_insertions( insertions, exlexer.get_tokens_unprocessed(curcode)) curcode = '' insertions = [] token = Generic.Error if in_error else Generic.Output yield match.start(), token, line if curcode: yield from do_insertions(insertions, exlexer.get_tokens_unprocessed(curcode))
def get_tokens_unprocessed(self, text): mlexer = MatlabLexer(**self.options) curcode = '' insertions = [] continuation = False for match in line_re.finditer(text): line = match.group() if line.startswith('>> '): insertions.append( (len(curcode), [(0, Generic.Prompt, line[:3])])) curcode += line[3:] elif line.startswith('>>'): insertions.append( (len(curcode), [(0, Generic.Prompt, line[:2])])) curcode += line[2:] elif line.startswith('???'): idx = len(curcode) # without is showing error on same line as before...? # line = "\n" + line token = (0, Generic.Traceback, line) insertions.append((idx, [token])) elif continuation: # line_start is the length of the most recent prompt symbol line_start = len(insertions[-1][-1][-1]) # Set leading spaces with the length of the prompt to be a generic prompt # This keeps code aligned when prompts are removed, say with some Javascript if line.startswith(' ' * line_start): insertions.append((len(curcode), [(0, Generic.Prompt, line[:line_start])])) curcode += line[line_start:] else: curcode += line else: if curcode: yield from do_insertions( insertions, mlexer.get_tokens_unprocessed(curcode)) curcode = '' insertions = [] yield match.start(), Generic.Output, line # Does not allow continuation if a comment is included after the ellipses. # Continues any line that ends with ..., even comments (lines that start with %) if line.strip().endswith('...'): continuation = True else: continuation = False if curcode: # or item: yield from do_insertions(insertions, mlexer.get_tokens_unprocessed(curcode))
def get_tokens_unprocessed(self, text): if self.python3: pylexer = Python3Lexer(**self.options) tblexer = Python3TracebackLexer(**self.options) else: pylexer = PythonLexer(**self.options) tblexer = PythonTracebackLexer(**self.options) curcode = '' insertions = [] curtb = '' tbindex = 0 tb = 0 for match in line_re.finditer(text): line = match.group() if line.startswith(u'>>> ') or line.startswith(u'... '): tb = 0 insertions.append( (len(curcode), [(0, Generic.Prompt, line[:4])])) curcode += line[4:] elif line.rstrip() == u'...' and not tb: # only a new >>> prompt can end an exception block # otherwise an ellipsis in place of the traceback frames # will be mishandled insertions.append( (len(curcode), [(0, Generic.Prompt, u'...')])) curcode += line[3:] else: if curcode: for item in do_insertions( insertions, pylexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] if (line.startswith(u'Traceback (most recent call last):') or re.match(u' File "[^"]+", line \\d+\\n$', line)): tb = 1 curtb = line tbindex = match.start() elif line == 'KeyboardInterrupt\n': yield match.start(), Name.Class, line elif tb: curtb += line if not (line.startswith(' ') or line.strip() == u'...'): tb = 0 for i, t, v in tblexer.get_tokens_unprocessed(curtb): yield tbindex + i, t, v curtb = '' else: yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item if curtb: for i, t, v in tblexer.get_tokens_unprocessed(curtb): yield tbindex + i, t, v
def get_tokens_unprocessed(self, text): innerlexer = self._innerLexerCls(**self.options) pos = 0 curcode = '' insertions = [] backslash_continuation = False for match in line_re.finditer(text): line = match.group() if backslash_continuation: curcode += line backslash_continuation = curcode.endswith('\\\n') continue venv_match = self._venv.match(line) if venv_match: venv = venv_match.group(1) venv_whitespace = venv_match.group(2) insertions.append((len(curcode), [(0, Generic.Prompt.VirtualEnv, venv)])) if venv_whitespace: insertions.append((len(curcode), [(0, Text, venv_whitespace)])) line = line[venv_match.end():] m = self._ps1rgx.match(line) if m: # To support output lexers (say diff output), the output # needs to be broken by prompts whenever the output lexer # changes. if not insertions: pos = match.start() insertions.append((len(curcode), [(0, Generic.Prompt, m.group(1))])) curcode += m.group(2) backslash_continuation = curcode.endswith('\\\n') elif line.startswith(self._ps2): insertions.append((len(curcode), [(0, Generic.Prompt, line[:len(self._ps2)])])) curcode += line[len(self._ps2):] backslash_continuation = curcode.endswith('\\\n') else: if insertions: toks = innerlexer.get_tokens_unprocessed(curcode) for i, t, v in do_insertions(insertions, toks): yield pos+i, t, v yield match.start(), Generic.Output, line insertions = [] curcode = '' if insertions: for i, t, v in do_insertions(insertions, innerlexer.get_tokens_unprocessed(curcode)): yield pos+i, t, v
def _handle_cssblock(self, match): """ match args: 1:style tag 2:newline, 3:code, 4:closing style tag """ from typecode._vendor.pygments.lexers import get_lexer_by_name # section header yield match.start(1), String, match.group(1) yield match.start(2), String, match.group(2) lexer = None if self.handlecodeblocks: try: lexer = get_lexer_by_name('css') except ClassNotFound: pass code = match.group(3) # no lexer for this language. handle it like it was a code block if lexer is None: yield match.start(3), String, code return yield from do_insertions([], lexer.get_tokens_unprocessed(code)) yield match.start(4), String, match.group(4)
def _handle_codeblock(self, match): """ match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks """ from typecode._vendor.pygments.lexers import get_lexer_by_name # section header yield match.start(1), String, match.group(1) yield match.start(2), String, match.group(2) yield match.start(3), Text, match.group(3) # lookup lexer if wanted and existing lexer = None if self.handlecodeblocks: try: lexer = get_lexer_by_name(match.group(2).strip()) except ClassNotFound: pass code = match.group(4) # no lexer for this language. handle it like it was a code block if lexer is None: yield match.start(4), String, code else: for item in do_insertions([], lexer.get_tokens_unprocessed(code)): yield item yield match.start(5), String, match.group(5)
def get_tokens_unprocessed(self, data): sql = PsqlRegexLexer(**self.options) lines = lookahead(line_re.findall(data)) # prompt-output cycle while 1: # consume the lines of the command: start with an optional prompt # and continue until the end of command is detected curcode = '' insertions = [] for line in lines: # Identify a shell prompt in case of psql commandline example if line.startswith('$') and not curcode: lexer = get_lexer_by_name('console', **self.options) yield from lexer.get_tokens_unprocessed(line) break # Identify a psql prompt mprompt = re_prompt.match(line) if mprompt is not None: insertions.append( (len(curcode), [(0, Generic.Prompt, mprompt.group())])) curcode += line[len(mprompt.group()):] else: curcode += line # Check if this is the end of the command # TODO: better handle multiline comments at the end with # a lexer with an external state? if re_psql_command.match(curcode) \ or re_end_command.search(curcode): break # Emit the combined stream of command and prompt(s) yield from do_insertions(insertions, sql.get_tokens_unprocessed(curcode)) # Emit the output lines out_token = Generic.Output for line in lines: mprompt = re_prompt.match(line) if mprompt is not None: # push the line back to have it processed by the prompt lines.send(line) break mmsg = re_message.match(line) if mmsg is not None: if mmsg.group(1).startswith("ERROR") \ or mmsg.group(1).startswith("FATAL"): out_token = Generic.Error yield (mmsg.start(1), Generic.Strong, mmsg.group(1)) yield (mmsg.start(2), out_token, mmsg.group(2)) else: yield (0, out_token, line) else: return
def get_tokens_unprocessed(self, text): jllexer = JuliaLexer(**self.options) start = 0 curcode = '' insertions = [] output = False error = False for line in text.splitlines(True): if line.startswith('julia>'): insertions.append( (len(curcode), [(0, Generic.Prompt, line[:6])])) curcode += line[6:] output = False error = False elif line.startswith('help?>') or line.startswith('shell>'): yield start, Generic.Prompt, line[:6] yield start + 6, Text, line[6:] output = False error = False elif line.startswith(' ') and not output: insertions.append((len(curcode), [(0, Text, line[:6])])) curcode += line[6:] else: if curcode: for item in do_insertions( insertions, jllexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] if line.startswith('ERROR: ') or error: yield start, Generic.Error, line error = True else: yield start, Generic.Output, line output = True start += len(line) if curcode: for item in do_insertions(insertions, jllexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): mlexer = MatlabLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() if line.startswith('>> '): insertions.append((len(curcode), [(0, Generic.Prompt, line[:3])])) curcode += line[3:] elif line.startswith('>>'): insertions.append((len(curcode), [(0, Generic.Prompt, line[:2])])) curcode += line[2:] elif line.startswith('???'): idx = len(curcode) # without is showing error on same line as before...? # line = "\n" + line token = (0, Generic.Traceback, line) insertions.append((idx, [token])) else: if curcode: for item in do_insertions( insertions, mlexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] yield match.start(), Generic.Output, line if curcode: # or item: for item in do_insertions( insertions, mlexer.get_tokens_unprocessed(curcode)): yield item
def get_tokens_unprocessed(self, text): phplexer = PhpLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() if line.startswith('>>> ') or line.startswith('... '): insertions.append( (len(curcode), [(0, Generic.Prompt, line[:4])])) curcode += line[4:] elif line.rstrip() == '...': insertions.append((len(curcode), [(0, Generic.Prompt, '...')])) curcode += line[3:] else: if curcode: yield from do_insertions( insertions, phplexer.get_tokens_unprocessed(curcode)) curcode = '' insertions = [] yield match.start(), Generic.Output, line if curcode: yield from do_insertions(insertions, phplexer.get_tokens_unprocessed(curcode))
def get_tokens_unprocessed(self, text): ins = [] lines = text.splitlines(True) done = '' lex = BaseMakefileLexer(**self.options) backslashflag = False for line in lines: if self.r_special.match(line) or backslashflag: ins.append((len(done), [(0, Comment.Preproc, line)])) backslashflag = line.strip().endswith('\\') elif self.r_comment.match(line): ins.append((len(done), [(0, Comment, line)])) else: done += line yield from do_insertions(ins, lex.get_tokens_unprocessed(done))
def get_tokens_unprocessed(self, text): style = self.options.get('litstyle') if style is None: style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird' code = '' insertions = [] if style == 'bird': # bird-style for match in line_re.finditer(text): line = match.group() m = self.bird_re.match(line) if m: insertions.append( (len(code), [(0, Comment.Special, m.group(1))])) code += m.group(2) else: insertions.append((len(code), [(0, Text, line)])) else: # latex-style from typecode._vendor.pygments.lexers.markup import TexLexer lxlexer = TexLexer(**self.options) codelines = 0 latex = '' for match in line_re.finditer(text): line = match.group() if codelines: if line.lstrip().startswith('\\end{code}'): codelines = 0 latex += line else: code += line elif line.lstrip().startswith('\\begin{code}'): codelines = 1 latex += line insertions.append( (len(code), list(lxlexer.get_tokens_unprocessed(latex)))) latex = '' else: latex += line insertions.append( (len(code), list(lxlexer.get_tokens_unprocessed(latex)))) for item in do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)): yield item
def get_tokens_unprocessed(self, text): # find and remove all the escape tokens (replace with an empty string) # this is very similar to DelegatingLexer.get_tokens_unprocessed. buffered = '' insertions = [] insertion_buf = [] for i, t, v in self._find_safe_escape_tokens(text): if t is None: if insertion_buf: insertions.append((len(buffered), insertion_buf)) insertion_buf = [] buffered += v else: insertion_buf.append((i, t, v)) if insertion_buf: insertions.append((len(buffered), insertion_buf)) return do_insertions(insertions, self.lang.get_tokens_unprocessed(buffered))
def _handle_sourcecode(self, match): from typecode._vendor.pygments.lexers import get_lexer_by_name # section header yield match.start(1), Punctuation, match.group(1) yield match.start(2), Text, match.group(2) yield match.start(3), Operator.Word, match.group(3) yield match.start(4), Punctuation, match.group(4) yield match.start(5), Text, match.group(5) yield match.start(6), Keyword, match.group(6) yield match.start(7), Text, match.group(7) # lookup lexer if wanted and existing lexer = None if self.handlecodeblocks: try: lexer = get_lexer_by_name(match.group(6).strip()) except ClassNotFound: pass indention = match.group(8) indention_size = len(indention) code = (indention + match.group(9) + match.group(10) + match.group(11)) # no lexer for this language. handle it like it was a code block if lexer is None: yield match.start(8), String, code return # highlight the lines with the lexer. ins = [] codelines = code.splitlines(True) code = '' for line in codelines: if len(line) > indention_size: ins.append((len(code), [(0, Text, line[:indention_size])])) code += line[indention_size:] else: code += line for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)): yield item