def get_tokens_unprocessed(self, text): handling_preproc = False preproc_index = 0 preproc_text = '' seen_name = False buffered_name_token = None buffered_spacing = [] for index, token, value in CLexer.get_tokens_unprocessed(self, text): if token == Token.Comment.Preproc: if value == '#': handling_preproc = True preproc_index = index + 1 preproc_text = '' yield index, token, value else: preproc_text += value else: if handling_preproc: handling_preproc = False first_space = preproc_text.find(' ') yield preproc_index, Token.Comment.Preproc, preproc_text[: first_space] rest = preproc_text[first_space:] rest_index = preproc_index + first_space sublexer = CLexer() for i, t, v in sublexer.get_tokens_unprocessed(rest): yield i + first_space, t, v if token == Token.Name or token == Token.Keyword.Type: if not seen_name: seen_name = True buffered_name_token = (index, token, value) else: seen_name = False _index, _type, _value = buffered_name_token buffered_name_token = None yield _index, Token.Name.Other, _value for i in buffered_spacing: yield i buffered_spacing = [] yield index, Token.Name.Variable, value else: if seen_name: if value == ' ' or value == '*': buffered_spacing.append((index, token, value)) continue seen_name = False yield buffered_name_token buffered_name_token = None for i in buffered_spacing: yield i buffered_spacing = [] yield index, token, value
def get_tokens_unprocessed(self, text): handling_preproc = False preproc_index = 0 preproc_text = '' seen_name = False buffered_name_token = None buffered_spacing = [] for index, token, value in CLexer.get_tokens_unprocessed(self, text): if token == Token.Comment.Preproc: if value == '#': handling_preproc = True preproc_index = index+1 preproc_text = '' yield index, token, value else: preproc_text += value else: if handling_preproc: handling_preproc = False first_space = preproc_text.find(' ') yield preproc_index, Token.Comment.Preproc, preproc_text[:first_space] rest = preproc_text[first_space:] rest_index = preproc_index + first_space sublexer = CLexer() for i,t,v in sublexer.get_tokens_unprocessed(rest): yield i+first_space,t,v if token == Token.Name or token == Token.Keyword.Type: if not seen_name: seen_name = True buffered_name_token = (index, token, value) else: seen_name = False _index, _type, _value = buffered_name_token buffered_name_token = None yield _index, Token.Name.Other, _value for i in buffered_spacing: yield i buffered_spacing = [] yield index, Token.Name.Variable, value else: if seen_name: if value == ' ' or value == '*': buffered_spacing.append((index,token,value)) continue seen_name = False yield buffered_name_token buffered_name_token = None for i in buffered_spacing: yield i buffered_spacing = [] yield index, token, value
def display(self, stype, mode='raw', oformat='term'): """ Display output for a single match :param stype: name of the matched type :type stype: str :param mode: display mode :type mode: str :param oformat: format of output for color (term, html) :type oformat: str :return: a human readable string containing the result of the search (matched line, context, file name, etc.) """ f = open(self.file, 'r') lines = f.readlines() pmatch = lines[self.line - 1][self.column:self.columnend] ptype = "*" # match is a pointer to struct if (CocciMatch.ptype_regexp.search(lines[self.line - 1][self.columnend:])): ptype = "" output = "" if mode == 'color': output += "%s: l.%s -%d, l.%s +%d, %s %s%s\n" % ( self.file, self.line, self.line - self.start_at, self.line, self.stop_at - self.line, stype, ptype, pmatch) for i in range(self.start_at - 1, min(self.stop_at, len(lines))): if mode == 'color': output += lines[i] elif mode == 'vim': output += "%s|%s| (%s %s%s): %s" % (self.file, i + 1, stype, ptype, pmatch, lines[i]) elif mode == 'emacs': output += "%s:%s: (%s %s%s): %s" % (self.file, i + 1, stype, ptype, pmatch, lines[i]) elif i == self.line - 1: output += "%s:%s (%s %s%s): %s" % (self.file, i + 1, stype, ptype, pmatch, lines[i]) else: output += "%s-%s %s - %s" % (self.file, i + 1, ' ' * (2 + len(stype + ptype + pmatch)), lines[i]) f.close() if mode == 'color': if have_pygments: lexer = CLexer() lfilter = NameHighlightFilter(names=[pmatch]) lexer.add_filter(lfilter) if oformat == "term": return highlight(output, lexer, Terminal256Formatter()) elif oformat == "html": return highlight(output, lexer, HtmlFormatter(noclasses=True)) else: return output return output + self.trailer
def color_line(self, line): """ """ lexer = CLexer() tokens = list(lexer.get_tokens(line)) new_line = "" for t in tokens: ttype = t[0] ttext = str(t[1]) if ttype == Token.Text: new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_INSN) elif ttype == Token.Text.Whitespace: new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_INSN) elif ttype == Token.Error: new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_ERROR) elif ttype == Token.Other: new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_DSTR) elif ttype == Token.Keyword: new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_KEYWORD) elif ttype == Token.Name: new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_LIBNAME) elif ttype == Token.Literal: new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_LOCNAME) elif ttype == Token.Literal.String: new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_STRING) elif ttype == Token.Literal.Number: new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_DNUM) elif ttype == Token.Operator: new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_ALTOP) elif ttype == Token.Punctuation: new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_SYMBOL) elif ttype == Token.Comment: new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_REGCMT) elif ttype == Token.Comment.Single: new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_REGCMT) elif ttype == Token.Generic: new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_CREFTAIL) else: new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_CREFTAIL) return new_line
def display(self, stype, mode='raw', oformat='term'): """ Display output for a single match :param stype: name of the matched type :type stype: str :param mode: display mode :type mode: str :param oformat: format of output for color (term, html) :type oformat: str :return: a human readable string containing the result of the search (matched line, context, file name, etc.) """ f = open(self.file, 'r') lines = f.readlines() pmatch = lines[self.line - 1][self.column:self.columnend] ptype = "*" # match is a pointer to struct if (CocciMatch.ptype_regexp.search(lines[self.line - 1][self.columnend:])): ptype = "" output = "" if mode == 'color': output += "%s: l.%s -%d, l.%s +%d, %s %s%s\n" % (self.file, self.line, self.line - self.start_at, self.line, self.stop_at - self.line, stype, ptype, pmatch) for i in range(self.start_at - 1, min(self.stop_at, len(lines))): if mode == 'color': output += lines[i] elif mode == 'vim': output += "%s|%s| (%s %s%s): %s" % (self.file, i + 1, stype, ptype, pmatch, lines[i]) elif mode == 'emacs': output += "%s:%s: (%s %s%s): %s" % (self.file, i + 1, stype, ptype, pmatch, lines[i]) elif i == self.line - 1: output += "%s:%s (%s %s%s): %s" % (self.file, i + 1, stype, ptype, pmatch, lines[i]) else: output += "%s-%s %s - %s" % (self.file, i + 1, ' ' * (2 + len(stype + ptype + pmatch)), lines[i]) f.close() if mode == 'color': if have_pygments: lexer = CLexer() lfilter = NameHighlightFilter(names=[pmatch]) lexer.add_filter(lfilter) if oformat == "term": return highlight(output, lexer, Terminal256Formatter()) elif oformat == "html": return highlight(output, lexer, HtmlFormatter(noclasses=True)) else: return output return output + self.trailer
def __str__(self): out = "" if self._header is not None: for line in self._header.split("\n"): out += "\n" + " "*18 + "| " + highlight(line, CLexer(), Terminal256Formatter(style='monokai')).strip() out += "\n" out += "\n".join(str(self[item]) for item in self) if self._footer is not None: for line in self._footer.split("\n"): out += "\n" + " "*18 + "| " + highlight(line, CLexer(), Terminal256Formatter(style='monokai')).strip() return out.rstrip()
def __str__(self): s = "" # Are we on the first line of this decompiled output? first = True if self.src is not None: src = self.src.decode('latin-1') for line in src.split("\n"): if first: # Can't do adjustments since we don't know the name if self._file_name is None: saddr = "{:18s}".format(hex(self.address)) else: # Adjust offset to make sense with our current binary saddr = "{:18s}".format( hex(self._process.memory[ self._file_name.decode('latin-1') + ":" + hex(self.address)].address)) if self.highlight is not None: s += getattr( colorama.Back, self.highlight ) + saddr + colorama.Style.RESET_ALL + "| " else: s += saddr + "| " s += highlight( line, CLexer(), Terminal256Formatter(style='monokai')).strip() + "\n" first = False else: saddr = " " * 18 if self.highlight is not None: s += getattr( colorama.Back, self.highlight ) + saddr + colorama.Style.RESET_ALL + "| " else: s += saddr + "| " s += highlight( line, CLexer(), Terminal256Formatter(style='monokai')).strip() + "\n" return s.strip()
def display(self, stype, mode='raw', oformat='term', before=0, after=0): """ Display output for a single match :param mode: display mode :type mode: str :param oformat: format of output for color (term, html) :type oformat: str :param before: number of lines to display before match :type before: int :param after: number of lines to display after match :type after: int """ f = open(self.file, 'r') lines = f.readlines() pmatch = lines[self.line - 1][self.column:self.columnend] ptype = "*" # match is a pointer to struct if (CocciMatch.ptype_regexp.search(lines[self.line - 1][self.columnend:])): ptype = "" output = "" if mode == 'color': output += "%s: l.%s -%d, l.%s +%d, %s %s%s\n" % (self.file, self.line, before, self.line, after, stype, ptype, pmatch) for i in range(int(self.line) - 1 - before, int(self.line) + after): if mode == 'color': output += lines[i] elif mode == 'vim': output += "%s|%s| (%s %s%s): %s" % (self.file, self.line, stype, ptype, pmatch, lines[i]) elif mode == 'emacs': output += "%s:%s: (%s %s%s): %s" % (self.file, self.line, stype, ptype, pmatch, lines[i]) else: output += "%s:%s (%s %s%s): %s" % (self.file, self.line, stype, ptype, pmatch, lines[i]) f.close() if mode == 'color': if have_pygments: lexer = CLexer() lfilter = NameHighlightFilter(names=[pmatch]) lexer.add_filter(lfilter) if oformat == "term": return highlight(output, lexer, Terminal256Formatter()) elif oformat == "html": return highlight(output, lexer, HtmlFormatter(noclasses=True)) else: return output return output
def build(self): root = BoxLayout(orientation="vertical", padding=5) butn = GridLayout(cols=3, size_hint=[1, .07]) self.nameF = TextInput(text="main.c", size_hint=[1, .1], background_color=[1, 1, 1, .5]) root.add_widget(self.nameF) buttonA = Button(text='Add File', on_press=self.add) butn.add_widget(buttonA) buttonC = Button(text='Compile File', on_press=self.compile) butn.add_widget(buttonC) buttonS = Button(text='Save File', on_press=self.save) butn.add_widget(buttonS) root.add_widget(butn) self.code = CodeInput(text="", lexer=CLexer()) root.add_widget(self.code) self.check = TextInput(text="", size_hint=[1, .3], background_color=[1, 1, 1, .5]) root.add_widget(self.check) return root
def render_output(self, code): lexer = CLexer() style = NativeStyle() style.background_color = BG_COLOR formatter = HtmlFormatter(full=True, style='native', noclasses=True) colored_code = highlight(code, lexer, formatter) show_html_report('{}.c'.format(self._function.name), colored_code)
def show_output(bv, func_name, decompiled): lexer = CLexer() style = NativeStyle() style.background_color = '#272811' formatter = HtmlFormatter(full=True, style='native', noclasses=True) colored_code = highlight(decompiled, lexer, formatter) bv.show_html_report('Decompiled ' + func_name, colored_code)
def add_comment(self): """ Add a commment to the selected line """ print("GhIDA:: [DEBUG] add_comment called") colored_line = self.GetCurrentLine(notags=1) if not colored_line: idaapi.warning("Select a line") return False # Use pygments to parse the line to check if there are comments line = idaapi.tag_remove(colored_line) lexer = CLexer() tokens = list(lexer.get_tokens(line)) text = "" text_comment = "" for t in tokens: ttype = t[0] ttext = str(t[1]) if ttype == Token.Comment.Single: text_comment = ttext.replace('//', '').strip() else: text += ttext # Get the new comment comment = gl.display_comment_form(text_comment) if not comment or len(comment) == 0: return False comment = comment.replace("//", "").replace("\n", " ") comment = comment.strip() # Create the new text full_comment = " // %s" % comment text = text.rstrip() new_text = text + full_comment text_colored = self.color_line(new_text) num_line = self.GetLineNo() self.EditLine(num_line, text_colored) self.RefreshCurrent() # Add comment to cache COMMENTS_CACHE.add_comment_to_cache(self.__ea, num_line, full_comment) print("GhIDA:: [DEBUG] Added comment to #line: %d (%s)" % (num_line, new_text)) return
def main(srcFile, jsonFile): src = readFrom(srcFile) srcHtml = highlight(src, CLexer(stripall=False), HtmlFormatter()) srcHtml = addLineNumbers(src, srcHtml) srcJson = readFrom(jsonFile) tplt = Template(readFrom(tgtTplt)) tgt = tplt.substitute(srcHtml=srcHtml, srcJson=srcJson) writeTo(tgtFile, tgt)
def testC(self): """ Does the CompletionLexer work for C/C++? """ lexer = CompletionLexer(CLexer()) self.assertEquals(lexer.get_context("foo.bar"), [ "foo", "bar" ]) self.assertEquals(lexer.get_context("foo->bar"), [ "foo", "bar" ]) lexer = CompletionLexer(CppLexer()) self.assertEquals(lexer.get_context("Foo::Bar"), [ "Foo", "Bar" ])
def write_html(self, fd): source = open(self.path, 'r') code = source.read() lexer = CLexer() formatter = FunctionHtmlFormatter(self.lines, full=True, linenos='inline') fd.write(highlight(code, lexer, formatter)) source.close()
def get_highlighted_cl_code(text): try: from pygments import highlight except ImportError: return text else: from pygments.lexers import CLexer from pygments.formatters import TerminalFormatter return highlight(text, CLexer(), TerminalFormatter())
def get_highlighted_code(text, python=False): try: from pygments import highlight except ImportError: return text else: from pygments.lexers import CLexer, PythonLexer from pygments.formatters import TerminalFormatter return highlight(text, CLexer() if not python else PythonLexer(), TerminalFormatter())
class CLexerTest(unittest.TestCase): def setUp(self): self.lexer = CLexer() def testNumbers(self): code = '42 23.42 23. .42 023 0xdeadbeef 23e+42 42e-23' wanted = [] for item in zip([ Number.Integer, Number.Float, Number.Float, Number.Float, Number.Oct, Number.Hex, Number.Float, Number.Float ], code.split()): wanted.append(item) wanted.append((Text, ' ')) wanted = [(Text, '')] + wanted[:-1] + [(Text, '\n')] self.assertEqual(list(self.lexer.get_tokens(code)), wanted)
def code(listing): # {{ ''' foo() \n bar() ''' | code }} # format this with external proc = Popen(["clang-format"], stdout=PIPE, stdin=PIPE) result = proc.communicate(input=bytearray(listing, 'utf-8')) if proc.returncode != 0: raise Exception("build.py fails to call clang-format for listing: " + listing) from pygments import highlight from pygments.lexers import CLexer from pygments.formatters import HtmlFormatter fmtd = highlight(result[0].decode(), CLexer(), HtmlFormatter()) return "%s\n" % fmtd
class CLexerTest(unittest.TestCase): def setUp(self): self.lexer = CLexer() def testNumbers(self): code = '42 23.42 23. .42 023 0xdeadbeef 23e+42 42e-23' wanted = [] for item in zip([Number.Integer, Number.Float, Number.Float, Number.Float, Number.Oct, Number.Hex, Number.Float, Number.Float], code.split()): wanted.append(item) wanted.append((Text, ' ')) wanted = [(Text, '')] + wanted[:-1] + [(Text, '\n')] self.assertEqual(list(self.lexer.get_tokens(code)), wanted)
def rewrite(fs, codes, out): with open(out, "w") as fd: (static_symbols, rewriting_plan) = prepare_rewritting(fs, codes) for (pn, code) in codes: # reschedule static symbols rewritten = highlight(code, CLexer(), TokenRewritter(pn, rewriting_plan)) for l in rewritten.splitlines(): fd.write(l.encode("utf8")) fd.write("\n") fd.write("/" + "*" * 60 + "/\n") fd.write("/* %s */\n" % pn) fd.write("/" + "*" * 60 + "/\n") return (static_symbols, rewriting_plan)
def htmlHighlight(code, lang): """Apply HTML formatting to highlight code fragment. :code: Source code :lang: Language of source code """ lexer = None if lang == 'C': lexer = CLexer() elif lang in ('Fortran77', 'Fortran95'): lexer = FortranLexer() if lexer: code = highlight(code, lexer, HtmlFormatter()).replace('<pre>', '<pre>\n') lines = [ '<li>%s</li>' % l for l in code.split('\n') if l[:4] != "<div" and l[:5]!="</pre" ] return '<pre class="prettyprint linenums"><ol class="highlight linenums">%s</ol></pre>' % ''.join(lines) # absolutely NO blank spaces! else: return code
def prepare_rewritting(target, codes): global FS_FORCE_REWRITE def _to_canonical(pn, sym): base = os.path.basename(pn) return sym + "_" + base.replace(".", "_").replace("-", "_") # get static symbols static_symbols = {} for (pn, code) in codes: formatter = StaticDecl() highlight(code, CLexer(), formatter) print("> %-50s: %d" % (pn, len(formatter.table))) static_symbols[pn] = formatter.table # check collisions rewriting_plan = defaultdict(dict) for (pivot_pn, pivot_tbl) in static_symbols.iteritems(): # rewrite if collapsed for sym in pivot_tbl: for (target_pn, target_tbl) in static_symbols.iteritems(): if pivot_pn == target_pn: continue if sym in target_tbl: print("> %s collaposed with %s & %s" % (sym, pivot_pn, target_pn)) rewriting_plan[pivot_pn][sym] = _to_canonical( pivot_pn, sym) # update pivot_tbl to minize rewriting for (sym, new_sym) in rewriting_plan[pivot_pn].iteritems(): pivot_tbl.remove(sym) pivot_tbl.add(new_sym) # manual rewriting (e.g., collision in headers) for sym in FS_FORCE_REWRITE.get(target, []): print("> manually include %s" % sym) for (pivot_pn, pivot_tbl) in static_symbols.iteritems(): rewriting_plan[pivot_pn][sym] = _to_canonical(pivot_pn, sym) return (static_symbols, rewriting_plan)
def __init__(self, logger, src): self.logger = logger self.tokens = lex(src, CLexer()) # Current token line number. self.cur_line_numb = 1 # Current token start offset within current line. self.cur_start_offset = 0 # List of entities (each represented as kind, line number, start and end offsets) to be highligted self.highlights = list() # Workaround for missed "\n" at the beginning of source file that do not become tokens. self.initial_new_lines_numb = 0 for c in src: if c == '\n': self.initial_new_lines_numb += 1 else: break
def catFile(file_name, base_path, report_path): """ Create an HTML page for a source file. """ file_path = os.path.join(base_path, file_name) with open(file_path, 'r') as f: lines = f.readlines() with tmpHtmlFile(report_path) as report_f: html_name = report_f.name tpl = Template(NEWFILE_TEMPLATE) try: text = ''.join(lines).decode('utf8').encode('ascii', 'ignore') except: text = ''.join(lines).decode('latin').encode('ascii', 'ignore') report_f.write( tpl.render(file_name=file_name, code=highlight(text, CLexer(), HtmlFormatter()))) return html_name, calcSignature(''.join([line for line in lines]))
class CLexerTest(unittest.TestCase): def setUp(self): self.lexer = CLexer() def testNumbers(self): code = '42 23.42 23. .42 023 0xdeadbeef 23e+42 42e-23' wanted = [] for item in zip([Number.Integer, Number.Float, Number.Float, Number.Float, Number.Oct, Number.Hex, Number.Float, Number.Float], code.split()): wanted.append(item) wanted.append((Text, ' ')) wanted = [(Text, '')] + wanted[:-1] + [(Text, '\n')] self.assertEqual(list(self.lexer.get_tokens(code)), wanted) def testSwitch(self): fragment = u'''\ int main() { switch (0) { case 0: default: ; } } ''' expected = [ (Token.Text, u''), (Token.Keyword.Type, u'int'), (Token.Text, u' '), (Token.Name.Function, u'main'), (Token.Text, u''), (Token.Punctuation, u'('), (Token.Punctuation, u')'), (Token.Text, u'\n'), (Token.Text, u''), (Token.Punctuation, u'{'), (Token.Text, u'\n'), (Token.Text, u' '), (Token.Keyword, u'switch'), (Token.Text, u' '), (Token.Punctuation, u'('), (Token.Literal.Number.Integer, u'0'), (Token.Punctuation, u')'), (Token.Text, u'\n'), (Token.Text, u' '), (Token.Punctuation, u'{'), (Token.Text, u'\n'), (Token.Text, u' '), (Token.Keyword, u'case'), (Token.Text, u' '), (Token.Literal.Number.Integer, u'0'), (Token.Operator, u':'), (Token.Text, u'\n'), (Token.Text, u' '), (Token.Keyword, u'default'), (Token.Operator, u':'), (Token.Text, u'\n'), (Token.Text, u' '), (Token.Punctuation, u';'), (Token.Text, u'\n'), (Token.Text, u' '), (Token.Punctuation, u'}'), (Token.Text, u'\n'), (Token.Punctuation, u'}'), (Token.Text, u'\n'), (Token.Text, u''), ] self.assertEqual(expected, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) def testSwitchSpaceBeforeColon(self): fragment = u'''\ int main() { switch (0) { case 0 : default : ; } } ''' expected = [ (Token.Text, u''), (Token.Keyword.Type, u'int'), (Token.Text, u' '), (Token.Name.Function, u'main'), (Token.Text, u''), (Token.Punctuation, u'('), (Token.Punctuation, u')'), (Token.Text, u'\n'), (Token.Text, u''), (Token.Punctuation, u'{'), (Token.Text, u'\n'), (Token.Text, u' '), (Token.Keyword, u'switch'), (Token.Text, u' '), (Token.Punctuation, u'('), (Token.Literal.Number.Integer, u'0'), (Token.Punctuation, u')'), (Token.Text, u'\n'), (Token.Text, u' '), (Token.Punctuation, u'{'), (Token.Text, u'\n'), (Token.Text, u' '), (Token.Keyword, u'case'), (Token.Text, u' '), (Token.Literal.Number.Integer, u'0'), (Token.Text, u' '), (Token.Operator, u':'), (Token.Text, u'\n'), (Token.Text, u' '), (Token.Keyword, u'default'), (Token.Text, u' '), (Token.Operator, u':'), (Token.Text, u'\n'), (Token.Text, u' '), (Token.Punctuation, u';'), (Token.Text, u'\n'), (Token.Text, u' '), (Token.Punctuation, u'}'), (Token.Text, u'\n'), (Token.Punctuation, u'}'), (Token.Text, u'\n'), (Token.Text, u''), ] self.assertEqual(expected, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) def testLabel(self): fragment = u'''\ int main() { foo: goto foo; } ''' expected = [ (Token.Text, u''), (Token.Keyword.Type, u'int'), (Token.Text, u' '), (Token.Name.Function, u'main'), (Token.Text, u''), (Token.Punctuation, u'('), (Token.Punctuation, u')'), (Token.Text, u'\n'), (Token.Text, u''), (Token.Punctuation, u'{'), (Token.Text, u'\n'), (Token.Name.Label, u'foo'), (Token.Punctuation, u':'), (Token.Text, u'\n'), (Token.Text, u' '), (Token.Keyword, u'goto'), (Token.Text, u' '), (Token.Name, u'foo'), (Token.Punctuation, u';'), (Token.Text, u'\n'), (Token.Punctuation, u'}'), (Token.Text, u'\n'), (Token.Text, u''), ] self.assertEqual(expected, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) def testLabelSpaceBeforeColon(self): fragment = u'''\ int main() { foo : goto foo; } ''' expected = [ (Token.Text, u''), (Token.Keyword.Type, u'int'), (Token.Text, u' '), (Token.Name.Function, u'main'), (Token.Text, u''), (Token.Punctuation, u'('), (Token.Punctuation, u')'), (Token.Text, u'\n'), (Token.Text, u''), (Token.Punctuation, u'{'), (Token.Text, u'\n'), (Token.Name.Label, u'foo'), (Token.Text, u' '), (Token.Punctuation, u':'), (Token.Text, u'\n'), (Token.Text, u' '), (Token.Keyword, u'goto'), (Token.Text, u' '), (Token.Name, u'foo'), (Token.Punctuation, u';'), (Token.Text, u'\n'), (Token.Punctuation, u'}'), (Token.Text, u'\n'), (Token.Text, u''), ] self.assertEqual(expected, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) def testLabelFollowedByStatement(self): fragment = u'''\ int main() { foo:return 0; goto foo; } ''' expected = [ (Token.Text, u''), (Token.Keyword.Type, u'int'), (Token.Text, u' '), (Token.Name.Function, u'main'), (Token.Text, u''), (Token.Punctuation, u'('), (Token.Punctuation, u')'), (Token.Text, u'\n'), (Token.Text, u''), (Token.Punctuation, u'{'), (Token.Text, u'\n'), (Token.Name.Label, u'foo'), (Token.Punctuation, u':'), (Token.Keyword, u'return'), (Token.Text, u' '), (Token.Literal.Number.Integer, u'0'), (Token.Punctuation, u';'), (Token.Text, u'\n'), (Token.Text, u' '), (Token.Keyword, u'goto'), (Token.Text, u' '), (Token.Name, u'foo'), (Token.Punctuation, u';'), (Token.Text, u'\n'), (Token.Punctuation, u'}'), (Token.Text, u'\n'), (Token.Text, u''), ] self.assertEqual(expected, list(self.lexer.get_tokens(textwrap.dedent(fragment))))
class CLexerTest(unittest.TestCase): def setUp(self): self.lexer = CLexer() def testNumbers(self): code = "42 23.42 23. .42 023 0xdeadbeef 23e+42 42e-23" wanted = [] for item in zip( [ Number.Integer, Number.Float, Number.Float, Number.Float, Number.Oct, Number.Hex, Number.Float, Number.Float, ], code.split(), ): wanted.append(item) wanted.append((Text, " ")) wanted = wanted[:-1] + [(Text, "\n")] self.assertEqual(list(self.lexer.get_tokens(code)), wanted) def testSwitch(self): fragment = u"""\ int main() { switch (0) { case 0: default: ; } } """ tokens = [ (Token.Keyword.Type, u"int"), (Token.Text, u" "), (Token.Name.Function, u"main"), (Token.Punctuation, u"("), (Token.Punctuation, u")"), (Token.Text, u"\n"), (Token.Punctuation, u"{"), (Token.Text, u"\n"), (Token.Text, u" "), (Token.Keyword, u"switch"), (Token.Text, u" "), (Token.Punctuation, u"("), (Token.Literal.Number.Integer, u"0"), (Token.Punctuation, u")"), (Token.Text, u"\n"), (Token.Text, u" "), (Token.Punctuation, u"{"), (Token.Text, u"\n"), (Token.Text, u" "), (Token.Keyword, u"case"), (Token.Text, u" "), (Token.Literal.Number.Integer, u"0"), (Token.Operator, u":"), (Token.Text, u"\n"), (Token.Text, u" "), (Token.Keyword, u"default"), (Token.Operator, u":"), (Token.Text, u"\n"), (Token.Text, u" "), (Token.Punctuation, u";"), (Token.Text, u"\n"), (Token.Text, u" "), (Token.Punctuation, u"}"), (Token.Text, u"\n"), (Token.Punctuation, u"}"), (Token.Text, u"\n"), ] self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) def testSwitchSpaceBeforeColon(self): fragment = u"""\ int main() { switch (0) { case 0 : default : ; } } """ tokens = [ (Token.Keyword.Type, u"int"), (Token.Text, u" "), (Token.Name.Function, u"main"), (Token.Punctuation, u"("), (Token.Punctuation, u")"), (Token.Text, u"\n"), (Token.Punctuation, u"{"), (Token.Text, u"\n"), (Token.Text, u" "), (Token.Keyword, u"switch"), (Token.Text, u" "), (Token.Punctuation, u"("), (Token.Literal.Number.Integer, u"0"), (Token.Punctuation, u")"), (Token.Text, u"\n"), (Token.Text, u" "), (Token.Punctuation, u"{"), (Token.Text, u"\n"), (Token.Text, u" "), (Token.Keyword, u"case"), (Token.Text, u" "), (Token.Literal.Number.Integer, u"0"), (Token.Text, u" "), (Token.Operator, u":"), (Token.Text, u"\n"), (Token.Text, u" "), (Token.Keyword, u"default"), (Token.Text, u" "), (Token.Operator, u":"), (Token.Text, u"\n"), (Token.Text, u" "), (Token.Punctuation, u";"), (Token.Text, u"\n"), (Token.Text, u" "), (Token.Punctuation, u"}"), (Token.Text, u"\n"), (Token.Punctuation, u"}"), (Token.Text, u"\n"), ] self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) def testLabel(self): fragment = u"""\ int main() { foo: goto foo; } """ tokens = [ (Token.Keyword.Type, u"int"), (Token.Text, u" "), (Token.Name.Function, u"main"), (Token.Punctuation, u"("), (Token.Punctuation, u")"), (Token.Text, u"\n"), (Token.Punctuation, u"{"), (Token.Text, u"\n"), (Token.Name.Label, u"foo"), (Token.Punctuation, u":"), (Token.Text, u"\n"), (Token.Text, u" "), (Token.Keyword, u"goto"), (Token.Text, u" "), (Token.Name, u"foo"), (Token.Punctuation, u";"), (Token.Text, u"\n"), (Token.Punctuation, u"}"), (Token.Text, u"\n"), ] self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) def testLabelSpaceBeforeColon(self): fragment = u"""\ int main() { foo : goto foo; } """ tokens = [ (Token.Keyword.Type, u"int"), (Token.Text, u" "), (Token.Name.Function, u"main"), (Token.Punctuation, u"("), (Token.Punctuation, u")"), (Token.Text, u"\n"), (Token.Punctuation, u"{"), (Token.Text, u"\n"), (Token.Name.Label, u"foo"), (Token.Text, u" "), (Token.Punctuation, u":"), (Token.Text, u"\n"), (Token.Text, u" "), (Token.Keyword, u"goto"), (Token.Text, u" "), (Token.Name, u"foo"), (Token.Punctuation, u";"), (Token.Text, u"\n"), (Token.Punctuation, u"}"), (Token.Text, u"\n"), ] self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) def testLabelFollowedByStatement(self): fragment = u"""\ int main() { foo:return 0; goto foo; } """ tokens = [ (Token.Keyword.Type, u"int"), (Token.Text, u" "), (Token.Name.Function, u"main"), (Token.Punctuation, u"("), (Token.Punctuation, u")"), (Token.Text, u"\n"), (Token.Punctuation, u"{"), (Token.Text, u"\n"), (Token.Name.Label, u"foo"), (Token.Punctuation, u":"), (Token.Keyword, u"return"), (Token.Text, u" "), (Token.Literal.Number.Integer, u"0"), (Token.Punctuation, u";"), (Token.Text, u"\n"), (Token.Text, u" "), (Token.Keyword, u"goto"), (Token.Text, u" "), (Token.Name, u"foo"), (Token.Punctuation, u";"), (Token.Text, u"\n"), (Token.Punctuation, u"}"), (Token.Text, u"\n"), ] self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment))))
# For type annotation from typing import Any, Dict # NOQA from pygments.formatter import Formatter # NOQA from sphinx.util.typing import unicode # NOQA logger = logging.getLogger(__name__) lexers = { 'none': TextLexer(stripnl=False), 'python': PythonLexer(stripnl=False), 'python3': Python3Lexer(stripnl=False), 'pycon': PythonConsoleLexer(stripnl=False), 'pycon3': PythonConsoleLexer(python3=True, stripnl=False), 'rest': RstLexer(stripnl=False), 'c': CLexer(stripnl=False), } # type: Dict[unicode, Lexer] for _lexer in lexers.values(): _lexer.add_filter('raiseonerror') escape_hl_chars = {ord(u'\\'): u'\\PYGZbs{}', ord(u'{'): u'\\PYGZob{}', ord(u'}'): u'\\PYGZcb{}'} # used if Pygments is available # use textcomp quote to get a true single quote _LATEX_ADD_STYLES = r''' \renewcommand\PYGZsq{\textquotesingle} '''
def setUp(self): self.lexer = CLexer()
background_color = '#eeffcc' default_style = '' styles = FriendlyStyle.styles styles.update({ Generic.Output: 'italic #333', Comment: 'italic #408090', }) lexers = defaultdict( TextLexer, none=TextLexer(), python=PythonLexer(), pycon=PythonConsoleLexer(), rest=RstLexer(), c=CLexer(), ) for _lexer in lexers.values(): _lexer.add_filter('raiseonerror') fmter = HtmlFormatter(style=PythonDocStyle) def highlight_block(source, lang): if not pygments: return '<pre>' + cgi.escape(source) + '</pre>\n' if lang == 'python': if source.startswith('>>>'): lexer = lexers['pycon'] else: lexer = lexers['python']
from style import NightOwl from pygments.formatters.html import HtmlFormatter from pygments import highlight from pygments.lexers import CLexer, MarkdownLexer if __name__ == "__main__": samples = { "samples/sample.c": CLexer(), "samples/sample.md": MarkdownLexer() } formatter = HtmlFormatter(style=NightOwl) formatter.style.background_color = '#011627' with open('test.html', 'w') as out_file: out_file.truncate() out_file.write( "<html><head><link rel='stylesheet' href='base.css'><style>{}</style></head><body>" .format(formatter.get_style_defs('div.highlight pre'))) files = samples.keys() files.sort() for key in files: with open(key, 'r') as sample_file: out_file.write( highlight(sample_file.read(), samples[key], formatter)) out_file.write("</body></html>")
if False: # For type annotation from typing import Any, Dict # NOQA from pygments.formatter import Formatter # NOQA logger = logging.getLogger(__name__) lexers = dict( none=TextLexer(stripnl=False), python=PythonLexer(stripnl=False), python3=Python3Lexer(stripnl=False), pycon=PythonConsoleLexer(stripnl=False), pycon3=PythonConsoleLexer(python3=True, stripnl=False), rest=RstLexer(stripnl=False), c=CLexer(stripnl=False), ) # type: Dict[unicode, Lexer] for _lexer in lexers.values(): _lexer.add_filter('raiseonerror') escape_hl_chars = { ord(u'\\'): u'\\PYGZbs{}', ord(u'{'): u'\\PYGZob{}', ord(u'}'): u'\\PYGZcb{}' } # used if Pygments is available # use textcomp quote to get a true single quote _LATEX_ADD_STYLES = r''' \renewcommand\PYGZsq{\textquotesingle} '''
def scan_source(fp, args): # print("scanning: %r" % fp) global filepath is_cpp = fp.endswith((".cpp", ".cxx")) filepath = fp #if "displist.c" not in filepath: # return filepath_base = os.path.basename(filepath) #print(highlight(code, CLexer(), RawTokenFormatter()).decode('utf-8')) code = open(filepath, 'r', encoding="utf-8").read() quick_check_indentation(code) # return del tokens[:] line = 1 for ttype, text in lex(code, CLexer()): if text: tokens.append(TokStore(ttype, text, line)) line += text.count("\n") col = 0 # track line length index_line_start = 0 for i, tok in enumerate(tokens): #print(tok.type, tok.text) if tok.type == Token.Keyword: if tok.text in {"switch", "while", "if", "for"}: item_range = extract_statement_if(i) if item_range is not None: blender_check_kw_if(item_range[0], i, item_range[1]) elif tok.text == "else": blender_check_kw_else(i) elif tok.text == "sizeof": blender_check_kw_sizeof(i) elif tok.type == Token.Punctuation: if tok.text == ",": blender_check_comma(i) elif tok.text == ".": blender_check_period(i) elif tok.text == "[": # note, we're quite relaxed about this but # disallow 'foo [' if tokens[i - 1].text.isspace(): if is_cpp and tokens[i + 1].text == "]": # c++ can do delete [] pass else: warning("space before '['", i, i) elif tok.text == "(": # check if this is a cast, eg: # (char), (char **), (float (*)[3]) item_range = extract_cast(i) if item_range is not None: blender_check_cast(item_range[0], item_range[1]) elif tok.text == "{": # check previous character is either a '{' or whitespace. if (tokens[i - 1].line == tok.line) and not (tokens[i - 1].text.isspace() or tokens[i - 1].text == "{"): warning("no space before '{'", i, i) blender_check_function_definition(i) elif tok.type == Token.Operator: # we check these in pairs, only want first if tokens[i - 1].type != Token.Operator: op, index_kw_end = extract_operator(i) blender_check_operator(i, index_kw_end, op, is_cpp) elif tok.type in Token.Comment: doxyfn = None if "\\file" in tok.text: doxyfn = tok.text.split("\\file", 1)[1].strip().split()[0] elif "@file" in tok.text: doxyfn = tok.text.split("@file", 1)[1].strip().split()[0] if doxyfn is not None: doxyfn_base = os.path.basename(doxyfn) if doxyfn_base != filepath_base: warning( "doxygen filename mismatch %s != %s" % (doxyfn_base, filepath_base), i, i) # ensure line length if (not args.no_length_check ) and tok.type == Token.Text and tok.text == "\n": # check line len blender_check_linelength(index_line_start, i - 1, col) col = 0 index_line_start = i + 1 else: col += len(tok.text.expandtabs(TAB_SIZE))
def scan_source(fp, args): # print("scanning: %r" % fp) global filepath is_cpp = fp.endswith((".cpp", ".cxx")) filepath = fp filepath_base = os.path.basename(filepath) #print(highlight(code, CLexer(), RawTokenFormatter()).decode('utf-8')) code = open(filepath, 'r', encoding="utf-8").read() quick_check_indentation(code) # return del tokens[:] line = 1 for ttype, text in lex(code, CLexer()): tokens.append(TokStore(ttype, text, line)) line += text.count("\n") col = 0 # track line length index_line_start = 0 for i, tok in enumerate(tokens): #print(tok.type, tok.text) if tok.type == Token.Keyword: if tok.text in {"switch", "while", "if", "for"}: item_range = extract_statement_if(i) if item_range is not None: blender_check_kw_if(item_range[0], i, item_range[1]) elif tok.text == "else": blender_check_kw_else(i) elif tok.type == Token.Punctuation: if tok.text == ",": blender_check_comma(i) elif tok.type == Token.Operator: # we check these in pairs, only want first if tokens[i - 1].type != Token.Operator: op, index_kw_end = extract_operator(i) blender_check_operator(i, index_kw_end, op, is_cpp) elif tok.type in Token.Comment: doxyfn = None if "\\file" in tok.text: doxyfn = tok.text.split("\\file", 1)[1].strip().split()[0] elif "@file" in tok.text: doxyfn = tok.text.split("@file", 1)[1].strip().split()[0] if doxyfn is not None: doxyfn_base = os.path.basename(doxyfn) if doxyfn_base != filepath_base: warning( "doxygen filename mismatch %s != %s" % (doxyfn_base, filepath_base), i, i) # ensure line length if (not args.no_length_check ) and tok.type == Token.Text and tok.text == "\n": # check line len blender_check_linelength(index_line_start, i - 1, col) col = 0 index_line_start = i + 1 else: col += len(tok.text.expandtabs(TAB_SIZE))
def get_tokens_unprocessed(self, text): for index, token, value in CLexer.get_tokens_unprocessed(self, text): if token is Name and value in self.EXTRA_TYPES: yield index, Keyword.Type, value else: yield index, token, value
def load_c_syntax(self): self.master.lexer = CLexer() self.master.initial_highlight()
def code_to_graphviz_html(code): style = get_style_by_name('default') return highlight( code, CLexer(), # FIXME GraphvizHtmlFormatter(style))