def highlight_code(self): line_numbers = [] html_snippets = [] if self.matched_line_number(): snippet_cluster_lns = self.compute_lines_to_highlight( self.adjacent_line_numbers()) snippets = [] for snippet_cluster_ln in snippet_cluster_lns: snippet = [] for n in snippet_cluster_ln: snippet.append(self.file_content_lines[n]) start_line = min(snippet_cluster_ln) highlight_lines = map(lambda x: x - start_line + 1, self.matching_line_numbers) snippets.append( ("\n".join(snippet), start_line, highlight_lines)) #self.code_snippets.append( GitSearchItemSnippet("\n".join(snippet), start_line) ) # lineostart is independent from hl_lines, so we need to take care of shifting the matching line numbers #print "Highlight Lines:" + str(highlight_lines) #html_snippets = ['<a href="%s#foo-%s">%s</a>' % (self.file_path, snippet[1], highlight(snippet[0], JavaLexer(), HtmlFormatter(linenos=True, anchorlinenos=True, linenostart=snippet[1]) )) for snippet in snippets] #hl_lines=snippet[2], html_snippets = [ highlight(snippet[0], JavaLexer(), LatexFormatter(linenos=True, linenostart=snippet[1])) for snippet in snippets ] self.code_snippets = [ GitSearchItemSnippet(self.hl_snippet(snippet[0], snippet[1]), snippet[1]) for snippet in snippets ] #hl_lines=snippet[2], # Lexical Search does not store line number, so we are currently not able to highlight the correct location of matched term if not html_snippets: html_snippets.append( highlight(self.file_content, JavaLexer(), HtmlFormatter(linenos=True, anchorlinenos=True))) self.code_snippets.append( GitSearchItemSnippet(self.hl_snippet(self.file_content, 0), 0)) #line_numbers = list(self.matched_line_number()) # unescape html and wrap snippets with anchor #html_snippets = [unescape_html(html_snippet) for html_snippet in html_snippets] # import uuid # filename = str(uuid.uuid4()) # with open("/tmp/%s" % filename, "w") as f: # f.write("".join(html_snippets)) #print "".join(html_snippets) return "".join(html_snippets)
class JavaTest(unittest.TestCase): def setUp(self): self.lexer = JavaLexer() self.maxDiff = None def testEnhancedFor(self): fragment = u'label:\nfor(String var2: var1) {}\n' tokens = [ (Name.Label, u'label:'), (Text, u'\n'), (Keyword, u'for'), (Operator, u'('), (Name, u'String'), (Text, u' '), (Name, u'var2'), (Operator, u':'), (Text, u' '), (Name, u'var1'), (Operator, u')'), (Text, u' '), (Operator, u'{'), (Operator, u'}'), (Text, u'\n'), ] self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
class PygmentsPreviewer(Previewer): # All supported MIME types MIMETYPES = ('text/css', 'text/x-python', 'text/x-ruby-script', 'text/x-java-source', 'text/x-c', 'application/javascript', 'text/x-c', 'text/x-fortran', 'text/x-csharp', 'text/php', 'text/x-php') # Python's mimetypes lib and Pygments do not quite agree on some MIME types CUSTOM_LEXERS = { 'text/x-c': CppLexer(), 'text/x-java-source': JavaLexer(), 'text/x-ruby-script': RubyLexer(), 'text/php': PhpLexer() } @classmethod def can_preview(cls, attachment_file): return attachment_file.content_type in cls.MIMETYPES @classmethod def generate_content(cls, attachment): mime_type = attachment.file.content_type lexer = cls.CUSTOM_LEXERS.get(mime_type) if lexer is None: lexer = get_lexer_for_mimetype(mime_type) with attachment.file.open() as f: html_formatter = HtmlFormatter(style='tango', linenos='inline', prestyles='mono') html_code = highlight(f.read(), lexer, html_formatter) css_code = html_formatter.get_style_defs('.highlight') return render_template('previewer_code:pygments_preview.html', attachment=attachment, html_code=html_code, css_code=css_code)
def highlightString(src): try: if self.currentExt == 'php': from pygments.lexers import PhpLexer return highlight(src, PhpLexer(), HtmlFormatter()) elif self.currentExt == 'py': from pygments.lexers import PythonLexer return highlight(src, PythonLexer(), HtmlFormatter()) elif self.currentExt == 'rb': from pygments.lexers import RubyLexer return highlight(src, RubyLexer(), HtmlFormatter()) elif self.currentExt == 'pl': from pygments.lexers import PerlLexer return highlight(src, PerlLexer(), HtmlFormatter()) elif self.currentExt == 'java': from pygments.lexers import JavaLexer return highlight(src, JavaLexer(), HtmlFormatter()) elif self.currentExt == 'cs': from pygments.lexers import CSharpLexer return highlight(src, CSharpLexer(), HtmlFormatter()) else: from pygments.lexers import JavascriptLexer return highlight(src, JavascriptLexer(), HtmlFormatter()) except: return "File could not be highlighted"
def reload_java_sources(self): """Reload completely the sources by asking Androguard to decompile it again. Useful when: - an element has been renamed to propagate the info - the current tab is changed because we do not know what user did since then, so we need to propagate previous changes as well """ androconf.debug("Getting sources for %s" % self.current_class) lines = [("COMMENTS", [("COMMENT", "// filename:%s\n// digest:%s\n\n" % (self.current_filename, self.current_digest))]) ] method_info_buff = "" for method in self.current_class.get_methods(): method_info_buff += "// " + str(method) + "\n" lines.append(("COMMENTS", [("COMMENT", method_info_buff + "\n\n")])) lines.extend(self.current_class.get_source_ext()) # TODO: delete doc when tab is closed? not deleted by "self" :( if hasattr(self, "doc"): del self.doc self.doc = SourceDocument(parent=self, lines=lines) self.setDocument(self.doc) # No need to save hightlighter. highlighBlock will automatically be called # because we passed the QTextDocument to QSyntaxHighlighter constructor MyHighlighter(self.doc, lexer=JavaLexer())
def reload_java_sources(self): '''Reload completely the sources by asking Androguard to decompile it again. Useful when: - an element has been renamed to propagate the info - the current tab is changed because we do not know what user did since then, so we need to propagate previous changes as well ''' androconf.debug("Getting sources for %s" % self.current_class) lines = [] lines.append(("COMMENTS", [("COMMENT", "/*\n * filename:%s\n * digest:%s\n */\n" % (self.current_filename, self.current_digest))])) lines.extend(self.current_class.get_source_ext()) #TODO: delete doc when tab is closed? not deleted by "self" :( if hasattr(self, "doc"): del self.doc self.doc = SourceDocument(parent=self, lines=lines) self.setDocument(self.doc) #No need to save hightlighter. highlighBlock will automatically be called #because we passed the QTextDocument to QSyntaxHighlighter constructor if PYGMENTS: PygmentsHighlighter(self.doc, lexer=JavaLexer()) else: androconf.debug("Pygments is not present !")
def _process(): for file_path in self.file_paths: file_content = read_file(file_path) code = highlight(file_content, JavaLexer(), HtmlFormatter(linenos=True)) self.html.append(code)
def choose_lexer(filename): fs = FileSystem() filetype = fs.getFileType(filename) if filetype == 'py': TextEditor.lex = PythonLexer() elif filetype == 'java': TextEditor.lex = JavaLexer() elif filetype == 'cpp': TextEditor.lex = CppLexer() TextEditor.codeinput.lexer = TextEditor.lex
def process_codeimport_cmd(ctx, tex, cmd, mode): blocks = catlist(['<div class="codeimport">']) # blocks.extend(tex2htm.process_recursively(cmd.args[0], mode)) clz, members = cmd.args[0].split('.', 1) members = members.split('.') code = catlist() for member in members: code.extend(get_member(member, clz)) blocks.append(highlight("\n".join(code), JavaLexer(), HtmlFormatter())) blocks.append("</div><!-- codeimport -->") return blocks
def highlight_files(file_paths): highlighted_code = [] for file_path in file_paths: file_content = read_file(file_path) code = highlight(file_content, JavaLexer(), HtmlFormatter(linenos=True)) highlighted_code.append(code) return highlighted_code
class JavaTest(unittest.TestCase): def setUp(self): self.lexer = JavaLexer() self.maxDiff = None def testEnhancedFor(self): fragment = u'label:\nfor(String var2: var1) {}\n' tokens = [ (Name.Label, u'label:'), (Text, u'\n'), (Keyword, u'for'), (Operator, u'('), (Name, u'String'), (Text, u' '), (Name, u'var2'), (Operator, u':'), (Text, u' '), (Name, u'var1'), (Operator, u')'), (Text, u' '), (Operator, u'{'), (Operator, u'}'), (Text, u'\n'), ] self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) def testNumericLiterals(self): fragment = '0 5L 9__542_72l 0xbEEf 0X9_A 0_35 01 0b0___101_0' fragment += ' 0. .7_17F 3e-1_3d 1f 6_01.9e+3 0x.1Fp3 0XEP8D\n' tokens = [(Number.Integer, '0'), (Text, ' '), (Number.Integer, '5L'), (Text, ' '), (Number.Integer, '9__542_72l'), (Text, ' '), (Number.Hex, '0xbEEf'), (Text, ' '), (Number.Hex, '0X9_A'), (Text, ' '), (Number.Oct, '0_35'), (Text, ' '), (Number.Oct, '01'), (Text, ' '), (Number.Bin, '0b0___101_0'), (Text, ' '), (Number.Float, '0.'), (Text, ' '), (Number.Float, '.7_17F'), (Text, ' '), (Number.Float, '3e-1_3d'), (Text, ' '), (Number.Float, '1f'), (Text, ' '), (Number.Float, '6_01.9e+3'), (Text, ' '), (Number.Float, '0x.1Fp3'), (Text, ' '), (Number.Float, '0XEP8D'), (Text, '\n')] self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
def choose_lexer(self): fs = FileSystem() if TextEditor.filepath != None: ext = fs.getFileType(TextEditor.filepath) if ext == 'py': TextEditor.lex = PythonLexer() elif ext == 'java': TextEditor.lex = JavaLexer() elif ext == 'cpp': TextEditor.lex = CppLexer() else: # No extension TextEditor.lex = PythonLexer()
def process_hash_env(ctx, b, env, mode): if ctx.screenreader_mode: inner = re.sub(r'(^|[^\\])&', r'\1\&', env.content) if mode & tex2htm.MATH: return catlist([r'\texttt{{{}}}'.format(inner)]) return catlist([r'<span class="texttt">{}</span>'.format(inner)]) print(mode, env) inner = re.sub(r'(^|[^\\])&', r'\1\&', env.content) # return catlist([highlight(inner, JavaLexer(), CodeHtmlFormatter())]) if mode & tex2htm.MATH: return catlist([r'\texttt{{{}}}'.format(inner)]) else: return catlist([highlight(inner, JavaLexer(), CodeHtmlFormatter())])
def create(type): valid = True postproc = None if type == 'py': lexer = PythonLexer() formatter = PyeFormatter(style=PyeDefaultStyle) elif type == 'c' or type == 'h': #lexer = CLexer() lexer = PyeCLexer() #postproc = CPostProc formatter = PyeFormatter(style=PyeDefaultStyle) elif type == 'java': #lexer = CLexer() lexer = JavaLexer() formatter = PyeFormatter(style=PyeDefaultStyle) else: debug("Cannot create a syntax highligher for '{}'... using TextLexer". format(type)) lexer = TextLexer() formatter = PyeFormatter(style=PyeTextStyle) def fmt_handler(data, regions): #debug("regions: {}".format(regions)) try: formatter.set_highlight_regions(regions) highlight(data, lexer, formatter) ret = formatter.get_formatted(data) #debug("fmt handler result: {}".format(ret)) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) debug(''.join(line for line in lines)) return None return ret return fmt_handler
def extract_class_and_method(java_dir, base_path): """ 为每个java文件提取class,function,attribute,name. 以文件的hash为key,存为json文件 """ if not os.path.isdir(java_dir): return None files = os.listdir(java_dir) names_dict = {} for f in files: h, i = os.path.splitext(f) if i == '.java': classnames = set() methodnames = set() attributenames = set() names = set() with open(os.path.join(java_dir, f)) as fl: cont = fl.read() x = highlight(cont, JavaLexer(), RawTokenFormatter()) for y in str(x, encoding='utf-8').splitlines(): ys = y.split('\t') if ys[0] == 'Token.Name.Class': classnames.add(eval(ys[1])) elif ys[0] == 'Token.Name.Function': methodnames.add(eval(ys[1])) elif ys[0] == 'Token.Name.Attribute': attributenames.add(eval(ys[1])) elif ys[0] == 'Token.Name': names.add(eval(ys[1])) names_dict[h] = { 'NC': list(classnames), 'NF': list(methodnames), 'NA': list(attributenames), 'N': list(names) } repo = java_dir.strip('/').split('/')[-1] jf = os.path.join(base_path, '{}.names.json'.format(repo)) json.dump(names_dict, open(jf, 'w')) return names_dict
def main(): if len(sys.argv) == 1: # test all programs except those we know don't work yet files = sorted([f for f in os.listdir("test_programs/good")]) elif sys.argv[1] == "um": files = undotted_method_call elif sys.argv[1] == "vt": files = vtable for file in files: fileName = f"test_programs/good/{file}" with open(fileName) as f: infoStr = f"File = {file}" asterisks = len(infoStr) * "*" print(f"{asterisks}\n{infoStr}\n{asterisks}") print(highlight(f.read(), JavaLexer(), T256F(style="monokai"))) os.system(f"./dj2ll {fileName}") print(asterisks) os.system(f"./{file[0:-3]}") print(asterisks) reply = str(input("(press [enter] to continue):")).strip() if reply != "": break os.system("rm good*")
def hl_snippet(self, source, start): return highlight( source, JavaLexer(), HtmlFormatter(linenos=True, anchorlinenos=True, linenostart=start) ) #unescape_html( highlight(source, JavaLexer(), HtmlFormatter(linenos=True, anchorlinenos=True, linenostart=start)) )
#!/usr/bin/python import sys import subprocess import re import time from termcolor import colored import math import pygments from pygments.lexers import CppLexer, PythonLexer, JavaLexer from pygments.formatters import TerminalFormatter cpp_lexer = CppLexer() python_lexer = PythonLexer() java_lexer = JavaLexer() terminal_formatter = TerminalFormatter() def colorize_cpp(line): return pygments.highlight(code, cpp_lexer, terminal_formatter).rstrip("\r\n") def colorize_python(line): return pygments.highlight(code, python_lexer, terminal_formatter).rstrip("\r\n") def colorize_java(line): return pygments.highlight(code, java_lexer,
def setUp(self): self.lexer = JavaLexer() self.maxDiff = None
ln += 1 currentline = self.file_content_lines[ln].strip() return ln def highlight_matched_terms(self, gitsearch_item_html): html_template = '<span class="hll">%s</span>' html = gitsearch_item_html for term in self.matched_terms: pattern = re.compile(r'\b%s\b' % term, re.IGNORECASE) #html = html.replace(term, html_template % term) html = pattern.sub(html_template % term, html) #print "term %s, replaced: %s" % (term, html_template % term) # Check if matched term is qualified and if it has already been something replaced if "." in term: for token in term.split("."): html = pattern.sub(html_template % token, html) return html if __name__ == '__main__': path = "/Users/Raphael/Downloads/GitArchive/linkedin_indextank-engine/indextank-engine/lucene-experimental/com/flaptor/org/apache/lucene/util/automaton/UTF32ToUTF8.java" matched_terms = [u'Integer.toBinaryString', u'Integer'] #i = GitSearchItem(path, matched_terms) file_content = read_file(path) print unescape_html( highlight(file_content, JavaLexer(), MyHtmlFormatter(linenos=True)))
def do_api(self, args): """ List and select methods from a given loaded API module := api list := api select := api analyzed list := api analyzed select """ # Locals class_selection = None method_selection = None surgical_lib = None try: # List the available API methods from the target module if args.split()[0] == "list": if self.target_module: print("\n") for k, v in self.target_module.model.values.items(): print("\n") for m in v: print( self.t.cyan("\t--> {} : {} : {}".format( self.target_module.name, k.split(".")[-1], m))) print("\n") else: self.logger.surgical_log( "info", "Target module has not been loaded (!)") # Select an API method from the target module elif args.split()[0] == "select": if self.target_module: # TODO Consider building a wrapper around raw_input() class_selection = raw_input( self.t.yellow("[{}] ".format(datetime.now())) + "Select class : ") method_selection = raw_input( self.t.yellow("[{}] ".format(datetime.now())) + "Select method : ") for k, v in self.target_module.model.values.items(): # This is so we can support classes with identical # method names --> Ex: java.util.zip.ZipFile if class_selection == k.split(".")[-1]: for m in v: if m == method_selection: self.logger.surgical_log( "info", "Analyzing ...") from core.brains.surgical.lib.libsurgical import SurgicalLib # Begin processing and return the results # from the selected api surgical_lib = SurgicalLib( self.target_module, self.vmx, self.vm, k, method_selection, self.methods) # methods_api_usage will contain a list of # tuples self.methods_api_usage = surgical_lib.search( ) else: self.logger.surgical_log( "warn", "Method not found (!)") # Analyze the processed method list elif args.split()[0] == "analyzed": # List the methods that have been processed if args.split()[1] == "list": if self.methods_api_usage: print("\n") for m in self.methods_api_usage: print( self.t.cyan("\t--> {} -> {} ".format( m[0].class_name, m[0].name))) print("\n") else: SurgicalError("API usage not found (!)") # Select from the processed method list elif args.split()[1] == "select": if self.methods_api_usage: selection = raw_input( self.t.yellow("[{}] ".format(datetime.now())) + "Select method : ") for m in self.methods_api_usage: if selection == m[0].name: print("\n") print( self.t.cyan("\t--> Class : {}".format( m[0].class_name))) print( self.t.cyan("\t\t--> Method : {}".format( m[0].name))) print( self.t.cyan( "\t\t\t --> XREFS ###########")) self.u.print_xref("T", m[1].method.XREFto.items) self.u.print_xref("F", m[1].method.XREFfrom.items) print("\n") print( highlight(m[2], JavaLexer(), TerminalFormatter())) else: SurgicalError("API usage not found (!)") except Exception as e: SurgicalError(e.message)
def pygment_mul_line(java_lines): string = '\n'.join(java_lines) if string == '': return list(), dict() x = highlight(string, JavaLexer(), RawTokenFormatter()) x = str(x, encoding='utf-8') tokenList = list() variableDict = dict() nameNum, attNum, clsNum, fucNum = 0, 0, 0, 0 otherDict = dict() floatNum, numberNum, strNum = 0, 0, 0 for y in x.splitlines(): ys = y.split('\t') # print(ys) s = eval(ys[1]) if s == '\n': tokenList.append('<nl>') elif s == 'NewBlock': tokenList.append('<nb>') elif s.isspace(): lines = s.count('\n') for _ in range(lines): tokenList.append('<nl>') elif "Token.Literal.Number.Float" == ys[0]: if s not in otherDict: sT = 'FLOAT{}'.format(floatNum) otherDict[s] = sT floatNum += 1 tokenList.append(otherDict[s]) elif ys[0].startswith('Token.Literal.Number'): if s not in otherDict: sT = 'NUMBER{}'.format(numberNum) otherDict[s] = sT numberNum += 1 tokenList.append(otherDict[s]) elif ys[0].startswith('Token.Literal.String'): if s not in otherDict: sT = 'STRING{}'.format(strNum) otherDict[s] = sT strNum += 1 tokenList.append(otherDict[s]) elif "Token.Name.Namespace" == ys[0]: tokenList.append('NAMESPACE') elif "Token.Comment.Single" == ys[0]: tokenList.append('SINGLE') tokenList.append('<nl>') elif "Token.Comment.Multiline" == ys[0]: lines = s.count('\n') for _ in range(lines): tokenList.append('COMMENT') tokenList.append('<nl>') tokenList.append('COMMENT') elif 'Token.Name.Decorator' == ys[0]: tokenList.append('@') tokenList.append(s[1:].lower()) elif 'Token.Name' == ys[0]: if s not in variableDict: sT = 'n{}'.format(nameNum) variableDict[s] = sT nameNum += 1 tokenList.append(s) elif 'Token.Name.Attribute' == ys[0]: if s not in variableDict: sT = 'a{}'.format(attNum) variableDict[s] = sT attNum += 1 tokenList.append(s) elif 'Token.Name.Class' == ys[0]: if s not in variableDict: sT = 'c{}'.format(clsNum) variableDict[s] = sT clsNum += 1 tokenList.append(s) elif 'Token.Name.Function' == ys[0]: if s not in variableDict: sT = 'f{}'.format(fucNum) variableDict[s] = sT fucNum += 1 tokenList.append(s) else: a = s.splitlines() for i in a: if i != '' and not i.isspace(): tokenList.append(i) tokenList.append('<nl>') tokenList.pop() return tokenList, variableDict
def lexer(): yield JavaLexer()
# end of class/interface/method declaration # BUG: this breaks decorators which have curly braces inside them, like # @Target({ElementType.METHOD, ElementType.TYPE}) - only @Target( is shown if (ttype is token.Operator) and (value == '{') and def_started: def_started = False yield token.Text, "\n" if def_started: yield ttype, value if __name__ == "__main__": import os from pygments import highlight from pygments.lexers import JavaLexer from pygments.formatters import NullFormatter lex = JavaLexer() lex.add_filter(JavaAPIFilter()) for (path, dirs, files) in os.walk('~/repos/git/junit:junit/src/main/java/org/junit'): for fname in files: f = os.path.join(path, fname) if f.endswith("src/main/java/org/junit/Ignore.java"): code = open(f, 'r').read() print "---------- start %s ----------" % f print highlight(code, lex, NullFormatter()) print "---------- end %s ----------" % f
# This program is free software. It comes without any warranty, to # the extent permitted by applicable law. You can redistribute it # and/or modify it under the terms of the Do What The F**k You Want # To Public License, Version 2, as published by Sam Hocevar. See # http://sam.zoy.org/wtfpl/COPYING for more details. from .topping import Topping try: from pygments import highlight from pygments.lexers import JavaLexer from pygments.formatters import HtmlFormatter SYNTAX_HIGHLIGHT = True FORMATTER = HtmlFormatter(classprefix="hl_", nowrap=True) LEXER = JavaLexer() except: SYNTAX_HIGHLIGHT = False class PacketsTopping(Topping): KEY = "packets.packet" NAME = "Packets" ITEMS = ("Direction", ("id", "ID"), ("size", "Size"), ("code", None)) SORTING = Topping.NUMERIC_SORT NO_ESCAPE = ("code") ESCAPE_TITLE = False DIRECTIONS = { (True, True): "Both", (True, False): "Client to server", (False, True): "Server to client",
def highlight_file(path): file_content = read_file(path) return highlight( file_content, JavaLexer(), HtmlFormatter(linenos=True, anchorlinenos=True, lineanchors="foo"))
def process(name): # Format is: ptitle <title> TITLE_TAG = "ptitle" # Format is: phead <level> <heading> HEADING_TAG = "phead" # Format is: plink <url> <optional text> LINK_TAG = "plink" # Format is: scode <tabs?> <dict of arguments> CODE_TAG = "scode" # Format is: pnav <prev file> <next file> NAV_TAG = "pnav" # Format is: syntax SYNTAX_TAG = "synx" # wanted something that wouldn't be casually typed header = '''<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <link href="css/style.css" rel="stylesheet" type="text/css"> <link href="css/tabs.css" rel="stylesheet" type="text/css"> <link href="css/code.css" rel="stylesheet" type="text/css"> <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.8.3/jquery.min.js"></script> <script src="js/tabs.js"></script> </head> <body>\n''' footer = ''' </body> </html>''' print("File is ", name) fin = open(name, 'r') fout = open("html/" + name.split('.')[0][9:] + ".html", 'w') print("Saving to html/", name.split('.')[0][9:], ".html") block_formatter = HtmlFormatter(linenos = True, cssclass = "src") snip_formatter = HtmlFormatter(linenos = False, cssclass = "src") lexers = {'rust': RustLexer(), 'java': JavaLexer()} in_para = False tab_num = 1 fragments = [] fout.write(header) # Note: You should lead with a heading. while True: line = fin.readline() if not line: break; tokens = line.replace('\n','').split(' ') #Cases for code snippets if tokens[0] == CODE_TAG: args = literal_eval(' '.join(tokens[2:])) keys = args.keys() if in_para: fout.write("</p>\n") in_para = False if 'loc' in keys: code = StringIO() f = open(args['loc'], 'r') if args['range']: start = int(args['start']) if 'start' in keys else 0 stop = int(args['stop']) if 'stop' in keys else float("inf") else: start = 0 stop = 0 i = 1 for l in f: if start <= i and (not args['range'] or i <= stop): code.write(l) i += 1 fout.write('<div class="notab">') fout.write(highlight(code.getvalue(), lexers[args['lang']], block_formatter) + '\n') fout.write('</div>') f.close() code.close() # No tabs and source files simultaneously for now. Would be easy to fix; I'm lazy. # Also doesn't support dynamic number of tabs. Again, easy fix. elif literal_eval(tokens[1]): rust = StringIO() java = StringIO() if fin.readline() != "rcode\n": raise Exception("Bad code format!") l = fin.readline() while l != "edocr\n": rust.write(l) l = fin.readline() if fin.readline() != "jcode\n": raise Exception("Bad code format!") l = fin.readline() while l != "edocj\n": java.write(l) l = fin.readline() fout.write(''' <ul class="tabs"> <li><a href="#tab{0}-1">Rust</a></li> <li><a href="#tab{0}-2">Java</a></li> </ul> <div id="tab{0}-1" class="tabcode"> '''.format(tab_num)) fout.write('<div class="tabbed">'); fout.write(highlight(rust.getvalue(), lexers['rust'], block_formatter) + '\n') fout.write('</div>'); fout.write('''\t\t\t\t\t</div> <div id="tab{0}-2" class="tabcode"> '''.format(tab_num)) fout.write('<div class="tabbed">'); fout.write(highlight(java.getvalue(), lexers['java'], block_formatter) + '\n') fout.write('</div>'); fout.write('''\t\t\t\t\t</div>''') tab_num += 1 else: code = StringIO() l = fin.readline() if l == "codeb\n": l = fin.readline() while l != "bedoc\n": code.write(l) l = fin.readline() fout.write('<div class="notab">'); fout.write(highlight(code.getvalue(), lexers[args['lang']], block_formatter) +'\n') fout.write("</div>"); elif l == "code\n": in_para = True l = fin.readline() while l != "edoc\n": code.write(l) l = fin.readline() high = highlight(code.getvalue(), lexers[args['lang']], snip_formatter) fout.seek(fout.tell()-5) fout.write('<span class="src"><code>' + high[22:-14].rstrip() + "</code></span>" + '\n') else: raise Exception("Bad codeblock format!") code.close() #Case for syntax elif tokens[0] == SYNTAX_TAG: fout.write('<div class="src"><pre>') l = fin.readline() while l != "xnys\n": tokens = l.split("`") for index, token in enumerate(tokens): if index % 2 == 1: fout.write('<span class="optional">') fout.write(highlight(token, lexers['rust'], snip_formatter)[22:-14]) if index % 2 == 1: fout.write('</span>') fout.write("\n") l = fin.readline() fout.write('</pre></div>') #Case for page title elif tokens[0] == TITLE_TAG: fout.write("<title>" + ' '.join(tokens[1:]) + "</title>\n") #Case for a heading elif tokens[0] == HEADING_TAG: link = '_'.join(tokens[2:]) fout.write("<h" + tokens[1] + ' id="' + link + '" >' + ' '.join(tokens[2:]) + "</h" + tokens[1] + ">\n") fragments.append((link,int(tokens[1]))) #Case for a link elif tokens[0] == LINK_TAG: if not in_para: fout.write("<p>\n") fout.write('<a href="' + tokens[1] + '">' + (tokens[1] if len(tokens) < 3 else ' '.join(tokens[2:])) + "</a>\n") if not in_para: fout.write("</p>\n") #Case for a paragraph break elif tokens == ['']: if in_para: fout.write("</p>\n") in_para = False #Case for the bottom-of-page navigation stuff elif tokens[0] == NAV_TAG: if in_para: fout.write("</p>\n") in_para = False fout.write('<br/><table style="border-top: 1px solid #ccc; border-bottom: 1px solid #ccc; width:100%"><tr><td style="width:20%"><a href="http://aml3.github.io/RustTutorial/html/' + tokens[1]+ '" style="float:left"> Previous </a></td>') fout.write('<td style="text-align: center;"><a href="http://aml3.github.io/RustTutorial/html/toc.html"> Table of Contents </a></td>') fout.write('<td style="width:20%"><a href="http://aml3.github.io/RustTutorial/html/' + tokens[2]+ '" style="float: right"> Next </a></td></tr></table><br/>') #The default case else: if not in_para: fout.write("<p>\n") in_para = True fout.write(line) fout.write(footer) fin.close() fout.close() return fragments
def src_parser(self): """Parse source code directory of a program and collect its java files. """ # Getting the list of source files recursively from the source directory src_addresses = glob.glob(str(self.src) + '/**/*.java', recursive=True) # Creating a java lexer instance for pygments.lex() method java_lexer = JavaLexer() src_files = OrderedDict() # Looping to parse each source file for src_file in src_addresses: with open(src_file, encoding='cp1256') as file: src = file.read() # Placeholder for different parts of a source file comments = '' class_names = [] attributes = [] method_names = [] variables = [] # Source parsing parse_tree = None try: parse_tree = javalang.parse.parse(src) for path, node in parse_tree.filter( javalang.tree.VariableDeclarator): if isinstance(path[-2], javalang.tree.FieldDeclaration): attributes.append(node.name) elif isinstance(path[-2], javalang.tree.VariableDeclaration): variables.append(node.name) except: pass # Lexically tokenize the source file lexed_src = pygments.lex(src, java_lexer) for i, token in enumerate(lexed_src): if token[0] in Token.Comment: # Removing the license comment if i == 0 and token[0] is Token.Comment.Multiline: src = src[src.index(token[1]) + len(token[1]):] continue comments += token[1] elif token[0] is Token.Name.Class: class_names.append(token[1]) elif token[0] is Token.Name.Function: method_names.append(token[1]) # Get the package declaration if exists if parse_tree and parse_tree.package: package_name = parse_tree.package.name else: package_name = None if self.name == 'aspectj': src_files[os.path.relpath( src_file, start=self.src)] = SourceFile( src, comments, class_names, attributes, method_names, variables, [os.path.basename(src_file).split('.')[0]], package_name) else: # If source file has package declaration if package_name: src_id = (package_name + '.' + os.path.basename(src_file)) else: src_id = os.path.basename(src_file) src_files[src_id] = SourceFile( src, comments, class_names, attributes, method_names, variables, [os.path.basename(src_file).split('.')[0]], package_name) return src_files
def src_parser(self): """Parse source code directory of a program and collect its java files. """ # Getting the list of source files recursively from the source directory src_addresses = glob.glob(str(self.src) + '/**/*.java', recursive=True) # Creating a java lexer instance for pygments.lex() method java_lexer = JavaLexer() src_files = OrderedDict() # Looping to parse each source file for src_file in src_addresses: with open(src_file, encoding='latin-1') as file: src = file.read() # Placeholder for different parts of a source file comments = '' class_names = [] attributes = [] method_names = [] variables = [] methods = [] methods_api = [] # Source parsing parse_tree = None try: parse_tree = javalang.parse.parse(src) for path, node in parse_tree.filter( javalang.tree.VariableDeclarator): if isinstance(path[-2], javalang.tree.FieldDeclaration): attributes.append(node.name) elif isinstance(path[-2], javalang.tree.VariableDeclaration): variables.append(node.name) except: pass # Extract methods if parse_tree: for _, node_to_find in parse_tree.filter( javalang.tree.MethodDeclaration): start, end = get_start_end_for_node( node_to_find, parse_tree) method = removeComments(get_string(start, end, src)) methods.append(method) methods_api.append(node_to_find.documentation) # Trimming the source file ind = False if parse_tree: if parse_tree.imports: last_imp_path = parse_tree.imports[-1].path src = src[src.index(last_imp_path) + len(last_imp_path) + 1:] elif parse_tree.package: package_name = parse_tree.package.name src = src[src.index(package_name) + len(package_name) + 1:] else: # There is no import and no package declaration ind = True # javalang can't parse the source file else: ind = True # Lexically tokenize the source file lexed_src = pygments.lex(src, java_lexer) for i, token in enumerate(lexed_src): if token[0] in Token.Comment: if ind and i == 0 and token[0] is Token.Comment.Multiline: src = src[src.index(token[1]) + len(token[1]):] continue comments += token[1] elif token[0] is Token.Name.Class: class_names.append(token[1]) elif token[0] is Token.Name.Function: method_names.append(token[1]) # Get the package declaration if exists if parse_tree and parse_tree.package: package_name = parse_tree.package.name else: package_name = None if self.name == 'aspectj' or 'tomcat' or 'eclipse' or 'swt' or 'birt': src_files[os.path.relpath( src_file, start=self.src)] = SourceFile( src, comments, class_names, attributes, method_names, variables, [os.path.basename(src_file).split('.')[0]], package_name, methods, methods_api) else: # If source file has package declaration if package_name: src_id = (package_name + '.' + os.path.basename(src_file)) else: src_id = os.path.basename(src_file) src_files[src_id] = SourceFile( src, comments, class_names, attributes, method_names, variables, [os.path.basename(src_file).split('.')[0]], package_name, methods, methods_api) return src_files
def do_api(self, args): """ List and select methods from a given loaded API module := api list := api select := api analyzed list := api analyzed select """ try: # List the available API methods from the target module if args.split()[0] == "list": if self.target_module: print("\n") for k, v in self.target_module.model.values.items(): for m in v: print( self.t.cyan("\t--> {} : {}".format( self.target_module.name, m))) print("\n") else: self.logger.surgical_log( "info", "Target module has not been loaded (!)") # Select an API method from the target module elif args.split()[0] == "select": if self.target_module: selection = raw_input( self.t.yellow("[{}] ".format(datetime.now())) + "Select method : ") for k, v in self.target_module.model.values.items(): for m in v: if m == selection: self.logger.surgical_log( "info", "Searching ...") from core.brains.surgical.lib.libsurgical import SurgicalLib # Begin processing and return the results fomr the selected method surgical_lib = SurgicalLib( self.target_module, self.vmx, self.vm, k, selection, self.methods) # methods_api_usage will contain a list of tuples self.methods_api_usage = surgical_lib.search() else: self.logger.surgical_log( "warn", "Method not found (!)") # Analyze the processed method list elif args.split()[0] == "analyzed": # List the methods that have been processed if args.split()[1] == "list": if self.methods_api_usage: print("\n") for m in self.methods_api_usage: print( self.t.cyan("\t--> {} -> {} ".format( m[0].class_name, m[0].name))) print("\n") else: SurgicalError("API usage not found (!)") SurgicalError("Try running --> 'api select' again (!)") # Select from the processed method list elif args.split()[1] == "select": if self.methods_api_usage: selection = raw_input( self.t.yellow("[{}] ".format(datetime.now())) + "Select method : ") for m in self.methods_api_usage: if selection == m[0].name: print("\n") print( self.t.cyan("\t--> Class : {}".format( m[0].class_name))) print( self.t.cyan("\t\t--> Method : {}".format( m[0].name))) print( self.t.cyan( "\t\t\t --> XREFS ###########")) self.u.print_xref("T", m[1].method.XREFto.items) self.u.print_xref("F", m[1].method.XREFfrom.items) print("\n") print( highlight(m[2], JavaLexer(), TerminalFormatter())) else: SurgicalError("API usage not found (!)") SurgicalError("Try running --> 'api select' again (!)") except Exception as e: SurgicalError(e.message)
def load_java_syntax(self): self.master.lexer = JavaLexer() self.master.initial_highlight()
class JavaTest(unittest.TestCase): def setUp(self): self.lexer = JavaLexer() self.maxDiff = None def testEnhancedFor(self): fragment = u'label:\nfor(String var2: var1) {}\n' tokens = [ (Name.Label, u'label:'), (Text, u'\n'), (Keyword, u'for'), (Operator, u'('), (Name, u'String'), (Text, u' '), (Name, u'var2'), (Operator, u':'), (Text, u' '), (Name, u'var1'), (Operator, u')'), (Text, u' '), (Operator, u'{'), (Operator, u'}'), (Text, u'\n'), ] self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) def testNumericLiterals(self): fragment = '0 5L 9__542_72l 0xbEEf 0X9_A 0_35 01 0b0___101_0' fragment += ' 0. .7_17F 3e-1_3d 1f 6_01.9e+3 0x.1Fp3 0XEP8D\n' tokens = [ (Number.Integer, '0'), (Text, ' '), (Number.Integer, '5L'), (Text, ' '), (Number.Integer, '9__542_72l'), (Text, ' '), (Number.Hex, '0xbEEf'), (Text, ' '), (Number.Hex, '0X9_A'), (Text, ' '), (Number.Oct, '0_35'), (Text, ' '), (Number.Oct, '01'), (Text, ' '), (Number.Bin, '0b0___101_0'), (Text, ' '), (Number.Float, '0.'), (Text, ' '), (Number.Float, '.7_17F'), (Text, ' '), (Number.Float, '3e-1_3d'), (Text, ' '), (Number.Float, '1f'), (Text, ' '), (Number.Float, '6_01.9e+3'), (Text, ' '), (Number.Float, '0x.1Fp3'), (Text, ' '), (Number.Float, '0XEP8D'), (Text, '\n') ] self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))