Пример #1
0
    def highlight_code(self):
        line_numbers = []
        html_snippets = []
        if self.matched_line_number():
            snippet_cluster_lns = self.compute_lines_to_highlight(
                self.adjacent_line_numbers())

            snippets = []
            for snippet_cluster_ln in snippet_cluster_lns:
                snippet = []

                for n in snippet_cluster_ln:

                    snippet.append(self.file_content_lines[n])
                start_line = min(snippet_cluster_ln)
                highlight_lines = map(lambda x: x - start_line + 1,
                                      self.matching_line_numbers)
                snippets.append(
                    ("\n".join(snippet), start_line, highlight_lines))
                #self.code_snippets.append( GitSearchItemSnippet("\n".join(snippet), start_line) )

            # lineostart is independent from hl_lines, so we need to take care of shifting the matching line numbers

            #print "Highlight Lines:" + str(highlight_lines)
            #html_snippets =  ['<a href="%s#foo-%s">%s</a>' % (self.file_path, snippet[1], highlight(snippet[0], JavaLexer(), HtmlFormatter(linenos=True, anchorlinenos=True, linenostart=snippet[1]) )) for snippet in snippets] #hl_lines=snippet[2],
            html_snippets = [
                highlight(snippet[0], JavaLexer(),
                          LatexFormatter(linenos=True, linenostart=snippet[1]))
                for snippet in snippets
            ]
            self.code_snippets = [
                GitSearchItemSnippet(self.hl_snippet(snippet[0], snippet[1]),
                                     snippet[1]) for snippet in snippets
            ]  #hl_lines=snippet[2],

        # Lexical Search does not store line number, so we are currently not able to highlight the correct location of matched term
        if not html_snippets:
            html_snippets.append(
                highlight(self.file_content, JavaLexer(),
                          HtmlFormatter(linenos=True, anchorlinenos=True)))
            self.code_snippets.append(
                GitSearchItemSnippet(self.hl_snippet(self.file_content, 0), 0))
            #line_numbers = list(self.matched_line_number())

        # unescape html and wrap snippets with anchor
        #html_snippets = [unescape_html(html_snippet) for html_snippet in html_snippets]

        # import uuid
        # filename = str(uuid.uuid4())
        # with open("/tmp/%s" % filename, "w") as f:
        # 	f.write("".join(html_snippets))
        #print "".join(html_snippets)

        return "".join(html_snippets)
Пример #2
0
class JavaTest(unittest.TestCase):

    def setUp(self):
        self.lexer = JavaLexer()
        self.maxDiff = None

    def testEnhancedFor(self):
        fragment = u'label:\nfor(String var2: var1) {}\n'
        tokens = [
            (Name.Label, u'label:'),
            (Text, u'\n'),
            (Keyword, u'for'),
            (Operator, u'('),
            (Name, u'String'),
            (Text, u' '),
            (Name, u'var2'),
            (Operator, u':'),
            (Text, u' '),
            (Name, u'var1'),
            (Operator, u')'),
            (Text, u' '),
            (Operator, u'{'),
            (Operator, u'}'),
            (Text, u'\n'),
        ]
        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
Пример #3
0
class PygmentsPreviewer(Previewer):
    # All supported MIME types
    MIMETYPES = ('text/css', 'text/x-python', 'text/x-ruby-script', 'text/x-java-source', 'text/x-c',
                 'application/javascript', 'text/x-c', 'text/x-fortran', 'text/x-csharp', 'text/php',
                 'text/x-php')

    # Python's mimetypes lib and Pygments do not quite agree on some MIME types
    CUSTOM_LEXERS = {
        'text/x-c': CppLexer(),
        'text/x-java-source': JavaLexer(),
        'text/x-ruby-script': RubyLexer(),
        'text/php': PhpLexer()
    }

    @classmethod
    def can_preview(cls, attachment_file):
        return attachment_file.content_type in cls.MIMETYPES

    @classmethod
    def generate_content(cls, attachment):
        mime_type = attachment.file.content_type

        lexer = cls.CUSTOM_LEXERS.get(mime_type)
        if lexer is None:
            lexer = get_lexer_for_mimetype(mime_type)

        with attachment.file.open() as f:
            html_formatter = HtmlFormatter(style='tango', linenos='inline', prestyles='mono')
            html_code = highlight(f.read(), lexer, html_formatter)

        css_code = html_formatter.get_style_defs('.highlight')

        return render_template('previewer_code:pygments_preview.html', attachment=attachment,
                               html_code=html_code, css_code=css_code)
Пример #4
0
 def highlightString(src):
     try:
         if self.currentExt == 'php':
             from pygments.lexers import PhpLexer
             return highlight(src, PhpLexer(), HtmlFormatter())
         elif self.currentExt == 'py':
             from pygments.lexers import PythonLexer
             return highlight(src, PythonLexer(), HtmlFormatter())
         elif self.currentExt == 'rb':
             from pygments.lexers import RubyLexer
             return highlight(src, RubyLexer(), HtmlFormatter())
         elif self.currentExt == 'pl':
             from pygments.lexers import PerlLexer
             return highlight(src, PerlLexer(), HtmlFormatter())
         elif self.currentExt == 'java':
             from pygments.lexers import JavaLexer
             return highlight(src, JavaLexer(), HtmlFormatter())
         elif self.currentExt == 'cs':
             from pygments.lexers import CSharpLexer
             return highlight(src, CSharpLexer(), HtmlFormatter())
         else:
             from pygments.lexers import JavascriptLexer
             return highlight(src, JavascriptLexer(), HtmlFormatter())
     except: 
         return "File could not be highlighted"
Пример #5
0
    def reload_java_sources(self):
        """Reload completely the sources by asking Androguard
           to decompile it again. Useful when:
            - an element has been renamed to propagate the info
            - the current tab is changed because we do not know what user
              did since then, so we need to propagate previous changes as well
        """

        androconf.debug("Getting sources for %s" % self.current_class)

        lines = [("COMMENTS", [("COMMENT", "// filename:%s\n// digest:%s\n\n" %
                                (self.current_filename, self.current_digest))])
                 ]

        method_info_buff = ""
        for method in self.current_class.get_methods():
            method_info_buff += "// " + str(method) + "\n"

        lines.append(("COMMENTS", [("COMMENT", method_info_buff + "\n\n")]))

        lines.extend(self.current_class.get_source_ext())

        # TODO: delete doc when tab is closed? not deleted by "self" :(
        if hasattr(self, "doc"):
            del self.doc
        self.doc = SourceDocument(parent=self, lines=lines)
        self.setDocument(self.doc)

        # No need to save hightlighter. highlighBlock will automatically be called
        # because we passed the QTextDocument to QSyntaxHighlighter constructor
        MyHighlighter(self.doc, lexer=JavaLexer())
Пример #6
0
class JavaTest(unittest.TestCase):
    def setUp(self):
        self.lexer = JavaLexer()
        self.maxDiff = None

    def testEnhancedFor(self):
        fragment = u'label:\nfor(String var2: var1) {}\n'
        tokens = [
            (Name.Label, u'label:'),
            (Text, u'\n'),
            (Keyword, u'for'),
            (Operator, u'('),
            (Name, u'String'),
            (Text, u' '),
            (Name, u'var2'),
            (Operator, u':'),
            (Text, u' '),
            (Name, u'var1'),
            (Operator, u')'),
            (Text, u' '),
            (Operator, u'{'),
            (Operator, u'}'),
            (Text, u'\n'),
        ]
        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
Пример #7
0
    def reload_java_sources(self):
        '''Reload completely the sources by asking Androguard
           to decompile it again. Useful when:
            - an element has been renamed to propagate the info
            - the current tab is changed because we do not know what user
              did since then, so we need to propagate previous changes as well
        '''

        androconf.debug("Getting sources for %s" % self.current_class)

        lines = []
        lines.append(("COMMENTS",
                      [("COMMENT", "/*\n * filename:%s\n * digest:%s\n */\n" %
                        (self.current_filename, self.current_digest))]))
        lines.extend(self.current_class.get_source_ext())

        #TODO: delete doc when tab is closed? not deleted by "self" :(
        if hasattr(self, "doc"):
            del self.doc
        self.doc = SourceDocument(parent=self, lines=lines)
        self.setDocument(self.doc)

        #No need to save hightlighter. highlighBlock will automatically be called
        #because we passed the QTextDocument to QSyntaxHighlighter constructor
        if PYGMENTS:
            PygmentsHighlighter(self.doc, lexer=JavaLexer())
        else:
            androconf.debug("Pygments is not present !")
Пример #8
0
    def _process():
        for file_path in self.file_paths:
            file_content = read_file(file_path)

            code = highlight(file_content, JavaLexer(),
                             HtmlFormatter(linenos=True))

            self.html.append(code)
Пример #9
0
 def choose_lexer(filename):
     fs = FileSystem()
     filetype = fs.getFileType(filename)
     if filetype == 'py':
         TextEditor.lex = PythonLexer()
     elif filetype == 'java':
         TextEditor.lex = JavaLexer()
     elif filetype == 'cpp':
         TextEditor.lex = CppLexer()
     TextEditor.codeinput.lexer = TextEditor.lex
Пример #10
0
def process_codeimport_cmd(ctx, tex, cmd, mode):
    blocks = catlist(['<div class="codeimport">'])
    # blocks.extend(tex2htm.process_recursively(cmd.args[0], mode))
    clz, members = cmd.args[0].split('.', 1)
    members = members.split('.')
    code = catlist()
    for member in members:
        code.extend(get_member(member, clz))
    blocks.append(highlight("\n".join(code), JavaLexer(), HtmlFormatter()))
    blocks.append("</div><!-- codeimport -->")
    return blocks
Пример #11
0
def highlight_files(file_paths):
    highlighted_code = []
    for file_path in file_paths:
        file_content = read_file(file_path)

        code = highlight(file_content, JavaLexer(),
                         HtmlFormatter(linenos=True))

        highlighted_code.append(code)

    return highlighted_code
Пример #12
0
class JavaTest(unittest.TestCase):
    def setUp(self):
        self.lexer = JavaLexer()
        self.maxDiff = None

    def testEnhancedFor(self):
        fragment = u'label:\nfor(String var2: var1) {}\n'
        tokens = [
            (Name.Label, u'label:'),
            (Text, u'\n'),
            (Keyword, u'for'),
            (Operator, u'('),
            (Name, u'String'),
            (Text, u' '),
            (Name, u'var2'),
            (Operator, u':'),
            (Text, u' '),
            (Name, u'var1'),
            (Operator, u')'),
            (Text, u' '),
            (Operator, u'{'),
            (Operator, u'}'),
            (Text, u'\n'),
        ]
        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))

    def testNumericLiterals(self):
        fragment = '0 5L 9__542_72l 0xbEEf 0X9_A 0_35 01 0b0___101_0'
        fragment += ' 0. .7_17F 3e-1_3d 1f 6_01.9e+3 0x.1Fp3 0XEP8D\n'
        tokens = [(Number.Integer, '0'), (Text, ' '), (Number.Integer, '5L'),
                  (Text, ' '), (Number.Integer, '9__542_72l'), (Text, ' '),
                  (Number.Hex, '0xbEEf'), (Text, ' '), (Number.Hex, '0X9_A'),
                  (Text, ' '), (Number.Oct, '0_35'), (Text, ' '),
                  (Number.Oct, '01'), (Text, ' '), (Number.Bin, '0b0___101_0'),
                  (Text, ' '), (Number.Float, '0.'), (Text, ' '),
                  (Number.Float, '.7_17F'), (Text, ' '),
                  (Number.Float, '3e-1_3d'), (Text, ' '), (Number.Float, '1f'),
                  (Text, ' '), (Number.Float, '6_01.9e+3'), (Text, ' '),
                  (Number.Float, '0x.1Fp3'), (Text, ' '),
                  (Number.Float, '0XEP8D'), (Text, '\n')]
        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
Пример #13
0
 def choose_lexer(self):
     fs = FileSystem()
     if TextEditor.filepath != None:
         ext = fs.getFileType(TextEditor.filepath)
         if ext == 'py':
             TextEditor.lex = PythonLexer()
         elif ext == 'java':
             TextEditor.lex = JavaLexer()
         elif ext == 'cpp':
             TextEditor.lex = CppLexer()
     else:
         # No extension
         TextEditor.lex = PythonLexer()
Пример #14
0
def process_hash_env(ctx, b, env, mode):
    if ctx.screenreader_mode:
        inner = re.sub(r'(^|[^\\])&', r'\1\&', env.content)
        if mode & tex2htm.MATH:
            return catlist([r'\texttt{{{}}}'.format(inner)])
        return catlist([r'<span class="texttt">{}</span>'.format(inner)])

    print(mode, env)

    inner = re.sub(r'(^|[^\\])&', r'\1\&', env.content)
    # return catlist([highlight(inner, JavaLexer(), CodeHtmlFormatter())])
    if mode & tex2htm.MATH:
        return catlist([r'\texttt{{{}}}'.format(inner)])
    else:
        return catlist([highlight(inner, JavaLexer(), CodeHtmlFormatter())])
Пример #15
0
def create(type):

    valid = True
    postproc = None

    if type == 'py':
        lexer = PythonLexer()
        formatter = PyeFormatter(style=PyeDefaultStyle)

    elif type == 'c' or type == 'h':
        #lexer = CLexer()
        lexer = PyeCLexer()
        #postproc = CPostProc
        formatter = PyeFormatter(style=PyeDefaultStyle)

    elif type == 'java':
        #lexer = CLexer()
        lexer = JavaLexer()
        formatter = PyeFormatter(style=PyeDefaultStyle)

    else:
        debug("Cannot create a syntax highligher for '{}'... using TextLexer".
              format(type))
        lexer = TextLexer()
        formatter = PyeFormatter(style=PyeTextStyle)

    def fmt_handler(data, regions):
        #debug("regions: {}".format(regions))
        try:
            formatter.set_highlight_regions(regions)
            highlight(data, lexer, formatter)
            ret = formatter.get_formatted(data)
            #debug("fmt handler result: {}".format(ret))
        except Exception as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            lines = traceback.format_exception(exc_type, exc_value,
                                               exc_traceback)
            debug(''.join(line for line in lines))
            return None

        return ret

    return fmt_handler
Пример #16
0
def extract_class_and_method(java_dir, base_path):
    """ 为每个java文件提取class,function,attribute,name.
        以文件的hash为key,存为json文件
    """
    if not os.path.isdir(java_dir):
        return None
    files = os.listdir(java_dir)
    names_dict = {}
    for f in files:
        h, i = os.path.splitext(f)
        if i == '.java':
            classnames = set()
            methodnames = set()
            attributenames = set()
            names = set()
            with open(os.path.join(java_dir, f)) as fl:
                cont = fl.read()
                x = highlight(cont, JavaLexer(), RawTokenFormatter())
                for y in str(x, encoding='utf-8').splitlines():
                    ys = y.split('\t')
                    if ys[0] == 'Token.Name.Class':
                        classnames.add(eval(ys[1]))
                    elif ys[0] == 'Token.Name.Function':
                        methodnames.add(eval(ys[1]))
                    elif ys[0] == 'Token.Name.Attribute':
                        attributenames.add(eval(ys[1]))
                    elif ys[0] == 'Token.Name':
                        names.add(eval(ys[1]))
            names_dict[h] = {
                'NC': list(classnames),
                'NF': list(methodnames),
                'NA': list(attributenames),
                'N': list(names)
            }
    repo = java_dir.strip('/').split('/')[-1]
    jf = os.path.join(base_path, '{}.names.json'.format(repo))
    json.dump(names_dict, open(jf, 'w'))
    return names_dict
Пример #17
0
def main():
    if len(sys.argv) == 1:
        # test all programs except those we know don't work yet
        files = sorted([f for f in os.listdir("test_programs/good")])
    elif sys.argv[1] == "um":
        files = undotted_method_call
    elif sys.argv[1] == "vt":
        files = vtable
    for file in files:
        fileName = f"test_programs/good/{file}"
        with open(fileName) as f:
            infoStr = f"File = {file}"
            asterisks = len(infoStr) * "*"
            print(f"{asterisks}\n{infoStr}\n{asterisks}")
            print(highlight(f.read(), JavaLexer(), T256F(style="monokai")))
            os.system(f"./dj2ll {fileName}")
            print(asterisks)
            os.system(f"./{file[0:-3]}")
            print(asterisks)
            reply = str(input("(press [enter] to continue):")).strip()
            if reply != "":
                break
    os.system("rm good*")
Пример #18
0
    def hl_snippet(self, source, start):

        return highlight(
            source, JavaLexer(),
            HtmlFormatter(linenos=True, anchorlinenos=True, linenostart=start)
        )  #unescape_html( highlight(source, JavaLexer(), HtmlFormatter(linenos=True, anchorlinenos=True, linenostart=start)) )
Пример #19
0
#!/usr/bin/python

import sys
import subprocess
import re
import time
from termcolor import colored
import math
import pygments
from pygments.lexers import CppLexer, PythonLexer, JavaLexer
from pygments.formatters import TerminalFormatter

cpp_lexer = CppLexer()
python_lexer = PythonLexer()
java_lexer = JavaLexer()

terminal_formatter = TerminalFormatter()


def colorize_cpp(line):
    return pygments.highlight(code, cpp_lexer,
                              terminal_formatter).rstrip("\r\n")


def colorize_python(line):
    return pygments.highlight(code, python_lexer,
                              terminal_formatter).rstrip("\r\n")


def colorize_java(line):
    return pygments.highlight(code, java_lexer,
Пример #20
0
 def setUp(self):
     self.lexer = JavaLexer()
     self.maxDiff = None
Пример #21
0
                    ln += 1
                    currentline = self.file_content_lines[ln].strip()

        return ln

    def highlight_matched_terms(self, gitsearch_item_html):
        html_template = '<span class="hll">%s</span>'
        html = gitsearch_item_html
        for term in self.matched_terms:
            pattern = re.compile(r'\b%s\b' % term, re.IGNORECASE)

            #html = html.replace(term, html_template % term)
            html = pattern.sub(html_template % term, html)
            #print "term %s, replaced: %s" % (term, html_template % term)
            # Check if matched term is qualified and if it has already been something replaced
            if "." in term:
                for token in term.split("."):
                    html = pattern.sub(html_template % token, html)

        return html


if __name__ == '__main__':
    path = "/Users/Raphael/Downloads/GitArchive/linkedin_indextank-engine/indextank-engine/lucene-experimental/com/flaptor/org/apache/lucene/util/automaton/UTF32ToUTF8.java"
    matched_terms = [u'Integer.toBinaryString', u'Integer']
    #i = GitSearchItem(path, matched_terms)
    file_content = read_file(path)

    print unescape_html(
        highlight(file_content, JavaLexer(), MyHtmlFormatter(linenos=True)))
Пример #22
0
    def do_api(self, args):
        """
        List and select methods from a given loaded API module

        := api list
        := api select
        := api analyzed list
        := api analyzed select
        """

        # Locals
        class_selection = None
        method_selection = None
        surgical_lib = None

        try:
            # List the available API methods from the target module
            if args.split()[0] == "list":
                if self.target_module:
                    print("\n")
                    for k, v in self.target_module.model.values.items():
                        print("\n")
                        for m in v:
                            print(
                                self.t.cyan("\t--> {} : {} : {}".format(
                                    self.target_module.name,
                                    k.split(".")[-1], m)))
                    print("\n")
                else:
                    self.logger.surgical_log(
                        "info", "Target module has not been loaded (!)")
            # Select an API method from the target module
            elif args.split()[0] == "select":
                if self.target_module:
                    # TODO Consider building a wrapper around raw_input()
                    class_selection = raw_input(
                        self.t.yellow("[{}] ".format(datetime.now())) +
                        "Select class : ")
                    method_selection = raw_input(
                        self.t.yellow("[{}] ".format(datetime.now())) +
                        "Select method : ")
                    for k, v in self.target_module.model.values.items():
                        # This is so we can support classes with identical
                        # method names --> Ex: java.util.zip.ZipFile
                        if class_selection == k.split(".")[-1]:
                            for m in v:
                                if m == method_selection:
                                    self.logger.surgical_log(
                                        "info", "Analyzing ...")
                                    from core.brains.surgical.lib.libsurgical import SurgicalLib
                                    # Begin processing and return the results
                                    # from the selected api
                                    surgical_lib = SurgicalLib(
                                        self.target_module, self.vmx, self.vm,
                                        k, method_selection, self.methods)
                                    # methods_api_usage will contain a list of
                                    # tuples
                                    self.methods_api_usage = surgical_lib.search(
                                    )
                                else:
                                    self.logger.surgical_log(
                                        "warn", "Method not found (!)")
            # Analyze the processed method list
            elif args.split()[0] == "analyzed":
                # List the methods that have been processed
                if args.split()[1] == "list":
                    if self.methods_api_usage:
                        print("\n")
                        for m in self.methods_api_usage:
                            print(
                                self.t.cyan("\t--> {} -> {} ".format(
                                    m[0].class_name, m[0].name)))
                        print("\n")
                    else:
                        SurgicalError("API usage not found (!)")
                # Select from the processed method list
                elif args.split()[1] == "select":
                    if self.methods_api_usage:
                        selection = raw_input(
                            self.t.yellow("[{}] ".format(datetime.now())) +
                            "Select method : ")
                        for m in self.methods_api_usage:
                            if selection == m[0].name:
                                print("\n")
                                print(
                                    self.t.cyan("\t--> Class : {}".format(
                                        m[0].class_name)))
                                print(
                                    self.t.cyan("\t\t--> Method : {}".format(
                                        m[0].name)))
                                print(
                                    self.t.cyan(
                                        "\t\t\t --> XREFS ###########"))
                                self.u.print_xref("T",
                                                  m[1].method.XREFto.items)
                                self.u.print_xref("F",
                                                  m[1].method.XREFfrom.items)
                                print("\n")
                                print(
                                    highlight(m[2], JavaLexer(),
                                              TerminalFormatter()))
                    else:
                        SurgicalError("API usage not found (!)")
        except Exception as e:
            SurgicalError(e.message)
Пример #23
0
def pygment_mul_line(java_lines):
    string = '\n'.join(java_lines)
    if string == '':
        return list(), dict()
    x = highlight(string, JavaLexer(), RawTokenFormatter())
    x = str(x, encoding='utf-8')
    tokenList = list()
    variableDict = dict()
    nameNum, attNum, clsNum, fucNum = 0, 0, 0, 0
    otherDict = dict()
    floatNum, numberNum, strNum = 0, 0, 0
    for y in x.splitlines():
        ys = y.split('\t')
        # print(ys)
        s = eval(ys[1])
        if s == '\n':
            tokenList.append('<nl>')
        elif s == 'NewBlock':
            tokenList.append('<nb>')
        elif s.isspace():
            lines = s.count('\n')
            for _ in range(lines):
                tokenList.append('<nl>')
        elif "Token.Literal.Number.Float" == ys[0]:
            if s not in otherDict:
                sT = 'FLOAT{}'.format(floatNum)
                otherDict[s] = sT
                floatNum += 1
            tokenList.append(otherDict[s])
        elif ys[0].startswith('Token.Literal.Number'):
            if s not in otherDict:
                sT = 'NUMBER{}'.format(numberNum)
                otherDict[s] = sT
                numberNum += 1
            tokenList.append(otherDict[s])
        elif ys[0].startswith('Token.Literal.String'):
            if s not in otherDict:
                sT = 'STRING{}'.format(strNum)
                otherDict[s] = sT
                strNum += 1
            tokenList.append(otherDict[s])
        elif "Token.Name.Namespace" == ys[0]:
            tokenList.append('NAMESPACE')
        elif "Token.Comment.Single" == ys[0]:
            tokenList.append('SINGLE')
            tokenList.append('<nl>')
        elif "Token.Comment.Multiline" == ys[0]:
            lines = s.count('\n')
            for _ in range(lines):
                tokenList.append('COMMENT')
                tokenList.append('<nl>')
            tokenList.append('COMMENT')
        elif 'Token.Name.Decorator' == ys[0]:
            tokenList.append('@')
            tokenList.append(s[1:].lower())
        elif 'Token.Name' == ys[0]:
            if s not in variableDict:
                sT = 'n{}'.format(nameNum)
                variableDict[s] = sT
                nameNum += 1
            tokenList.append(s)
        elif 'Token.Name.Attribute' == ys[0]:
            if s not in variableDict:
                sT = 'a{}'.format(attNum)
                variableDict[s] = sT
                attNum += 1
            tokenList.append(s)
        elif 'Token.Name.Class' == ys[0]:
            if s not in variableDict:
                sT = 'c{}'.format(clsNum)
                variableDict[s] = sT
                clsNum += 1
            tokenList.append(s)
        elif 'Token.Name.Function' == ys[0]:
            if s not in variableDict:
                sT = 'f{}'.format(fucNum)
                variableDict[s] = sT
                fucNum += 1
            tokenList.append(s)
        else:
            a = s.splitlines()
            for i in a:
                if i != '' and not i.isspace():
                    tokenList.append(i)
                tokenList.append('<nl>')
            tokenList.pop()
    return tokenList, variableDict
Пример #24
0
def lexer():
    yield JavaLexer()
Пример #25
0
            # end of class/interface/method declaration
# BUG: this breaks decorators which have curly braces inside them, like
# @Target({ElementType.METHOD, ElementType.TYPE}) - only @Target( is shown
            if (ttype is token.Operator) and (value == '{') and def_started:
                def_started = False
                yield token.Text, "\n"


            if def_started:
                yield ttype, value


if __name__ == "__main__":
    import os
    from pygments import highlight
    from pygments.lexers import JavaLexer
    from pygments.formatters import NullFormatter

    lex = JavaLexer()
    lex.add_filter(JavaAPIFilter())

    for (path, dirs, files) in os.walk('~/repos/git/junit:junit/src/main/java/org/junit'):
        for fname in files:
            f = os.path.join(path, fname)
            if f.endswith("src/main/java/org/junit/Ignore.java"):
                code = open(f, 'r').read()
                print "---------- start %s ----------" % f
                print highlight(code, lex, NullFormatter())
                print "---------- end %s ----------" % f
Пример #26
0
# This program is free software. It comes without any warranty, to
# the extent permitted by applicable law. You can redistribute it
# and/or modify it under the terms of the Do What The F**k You Want
# To Public License, Version 2, as published by Sam Hocevar. See
# http://sam.zoy.org/wtfpl/COPYING for more details.

from .topping import Topping

try:
    from pygments import highlight
    from pygments.lexers import JavaLexer
    from pygments.formatters import HtmlFormatter
    SYNTAX_HIGHLIGHT = True
    FORMATTER = HtmlFormatter(classprefix="hl_", nowrap=True)
    LEXER = JavaLexer()
except:
    SYNTAX_HIGHLIGHT = False


class PacketsTopping(Topping):
    KEY = "packets.packet"
    NAME = "Packets"
    ITEMS = ("Direction", ("id", "ID"), ("size", "Size"), ("code", None))
    SORTING = Topping.NUMERIC_SORT
    NO_ESCAPE = ("code")
    ESCAPE_TITLE = False

    DIRECTIONS = {
        (True, True): "Both",
        (True, False): "Client to server",
        (False, True): "Server to client",
Пример #27
0
def highlight_file(path):
    file_content = read_file(path)
    return highlight(
        file_content, JavaLexer(),
        HtmlFormatter(linenos=True, anchorlinenos=True, lineanchors="foo"))
Пример #28
0
def process(name):
    # Format is: ptitle <title>
    TITLE_TAG = "ptitle"

    # Format is: phead <level> <heading>
    HEADING_TAG = "phead"

    # Format is: plink <url> <optional text>
    LINK_TAG = "plink"

    # Format is: scode <tabs?> <dict of arguments>
    CODE_TAG = "scode"

    # Format is: pnav <prev file> <next file>
    NAV_TAG = "pnav"

    # Format is: syntax
    SYNTAX_TAG = "synx" # wanted something that wouldn't be casually typed

    header = '''<!DOCTYPE html>
<html>
    <head>
        <meta charset="utf-8">
        <link href="css/style.css" rel="stylesheet" type="text/css">
        <link href="css/tabs.css" rel="stylesheet" type="text/css">
        <link href="css/code.css" rel="stylesheet" type="text/css">
        <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.8.3/jquery.min.js"></script>
        <script src="js/tabs.js"></script>
    </head>
    <body>\n'''

    footer = '''    </body>
                </html>'''

    print("File is ", name)
    fin = open(name, 'r')
    fout = open("html/" + name.split('.')[0][9:] + ".html", 'w')
    print("Saving to html/", name.split('.')[0][9:], ".html")

    block_formatter = HtmlFormatter(linenos = True, cssclass = "src")
    snip_formatter = HtmlFormatter(linenos = False, cssclass = "src")
    lexers = {'rust': RustLexer(), 'java': JavaLexer()} 

    in_para = False
    tab_num = 1
    fragments = []

    fout.write(header)
    # Note: You should lead with a heading.
    while True:
        line = fin.readline()
        if not line:
            break;
        tokens = line.replace('\n','').split(' ')
        #Cases for code snippets
        if tokens[0] == CODE_TAG:
            args = literal_eval(' '.join(tokens[2:]))
            keys = args.keys()
            if in_para:
                fout.write("</p>\n")
                in_para = False

            if 'loc' in keys:
                code = StringIO()
                f = open(args['loc'], 'r')
                if args['range']:
                    start = int(args['start']) if 'start' in keys else 0
                    stop = int(args['stop']) if 'stop' in keys else float("inf")
                else:
                    start = 0
                    stop = 0
                i = 1
                for l in f:
                    if start <= i and (not args['range'] or i <= stop):
                        code.write(l)
                    i += 1
                fout.write('<div class="notab">')
                fout.write(highlight(code.getvalue(), lexers[args['lang']], block_formatter) + '\n')
                fout.write('</div>')
                f.close()
                code.close()

            # No tabs and source files simultaneously for now. Would be easy to fix; I'm lazy.
            # Also doesn't support dynamic number of tabs. Again, easy fix.
            elif literal_eval(tokens[1]):
                rust = StringIO()
                java = StringIO()

                if fin.readline() != "rcode\n":
                    raise Exception("Bad code format!")
                l = fin.readline()
                while l != "edocr\n":
                    rust.write(l)
                    l = fin.readline()

                if fin.readline() != "jcode\n":
                    raise Exception("Bad code format!")
                l = fin.readline()
                while l != "edocj\n":
                    java.write(l)
                    l = fin.readline()

                fout.write('''  <ul class="tabs">
        <li><a href="#tab{0}-1">Rust</a></li>
        <li><a href="#tab{0}-2">Java</a></li>
    </ul>
    <div id="tab{0}-1" class="tabcode">
                    '''.format(tab_num))

                fout.write('<div class="tabbed">');
                fout.write(highlight(rust.getvalue(), lexers['rust'], block_formatter) + '\n')
                fout.write('</div>');
                fout.write('''\t\t\t\t\t</div>
                            
    <div id="tab{0}-2" class="tabcode">
                    '''.format(tab_num))

                fout.write('<div class="tabbed">');
                fout.write(highlight(java.getvalue(), lexers['java'], block_formatter) + '\n')
                fout.write('</div>');
                fout.write('''\t\t\t\t\t</div>''')                        
                tab_num += 1

            else:
                code = StringIO()
                l = fin.readline()
                if l == "codeb\n":
                    l = fin.readline()
                    while l != "bedoc\n":
                        code.write(l)
                        l = fin.readline()
                    fout.write('<div class="notab">');
                    fout.write(highlight(code.getvalue(), lexers[args['lang']], block_formatter) +'\n')
                    fout.write("</div>");

                elif l == "code\n":
                    in_para = True
                    l = fin.readline()
                    while l != "edoc\n":
                        code.write(l)
                        l = fin.readline()
                    high = highlight(code.getvalue(), lexers[args['lang']], snip_formatter)
                    fout.seek(fout.tell()-5)
                    fout.write('<span class="src"><code>' + high[22:-14].rstrip() + "</code></span>" + '\n')

                else:
                    raise Exception("Bad codeblock format!")
                code.close()
        #Case for syntax
        elif tokens[0] == SYNTAX_TAG:
            fout.write('<div class="src"><pre>')
            l = fin.readline()
            while l != "xnys\n":
                tokens = l.split("`")
                for index, token in enumerate(tokens):
                    if index % 2 == 1:
                        fout.write('<span class="optional">')
                    fout.write(highlight(token, lexers['rust'], snip_formatter)[22:-14])
                    if index % 2 == 1:
                        fout.write('</span>')
                fout.write("\n")
                l = fin.readline()
            fout.write('</pre></div>')
                    
        #Case for page title 
        elif tokens[0] == TITLE_TAG:
            fout.write("<title>" + ' '.join(tokens[1:]) + "</title>\n")

        #Case for a heading    
        elif tokens[0] == HEADING_TAG:
            link = '_'.join(tokens[2:])
            fout.write("<h" + tokens[1] + ' id="' + link + '" >' + ' '.join(tokens[2:]) + "</h" + tokens[1] + ">\n")
            fragments.append((link,int(tokens[1])))

        #Case for a link
        elif tokens[0] == LINK_TAG:
            if not in_para:
                fout.write("<p>\n")
            fout.write('<a href="' + tokens[1] + '">' + (tokens[1] if len(tokens) < 3 else ' '.join(tokens[2:])) + "</a>\n")
            if not in_para:
                fout.write("</p>\n")

        #Case for a paragraph break
        elif tokens == ['']:
            if in_para:
                fout.write("</p>\n")
                in_para = False

        #Case for the bottom-of-page navigation stuff
        elif tokens[0] == NAV_TAG:
            if in_para:
                fout.write("</p>\n")
                in_para = False
            fout.write('<br/><table style="border-top: 1px solid #ccc; border-bottom: 1px solid #ccc; width:100%"><tr><td style="width:20%"><a href="http://aml3.github.io/RustTutorial/html/' + tokens[1]+ '" style="float:left"> Previous </a></td>')
            fout.write('<td style="text-align: center;"><a href="http://aml3.github.io/RustTutorial/html/toc.html"> Table of Contents </a></td>')
            fout.write('<td style="width:20%"><a href="http://aml3.github.io/RustTutorial/html/' + tokens[2]+ '" style="float: right"> Next </a></td></tr></table><br/>')

        #The default case
        else:
            if not in_para:
                fout.write("<p>\n")
                in_para = True
            fout.write(line)
    fout.write(footer)
    fin.close()
    fout.close()
    return fragments
Пример #29
0
    def src_parser(self):
        """Parse source code directory of a program and collect
        its java files.
        """

        # Getting the list of source files recursively from the source directory
        src_addresses = glob.glob(str(self.src) + '/**/*.java', recursive=True)

        # Creating a java lexer instance for pygments.lex() method
        java_lexer = JavaLexer()

        src_files = OrderedDict()

        # Looping to parse each source file
        for src_file in src_addresses:
            with open(src_file, encoding='cp1256') as file:
                src = file.read()

            # Placeholder for different parts of a source file
            comments = ''
            class_names = []
            attributes = []
            method_names = []
            variables = []

            # Source parsing
            parse_tree = None
            try:
                parse_tree = javalang.parse.parse(src)
                for path, node in parse_tree.filter(
                        javalang.tree.VariableDeclarator):
                    if isinstance(path[-2], javalang.tree.FieldDeclaration):
                        attributes.append(node.name)
                    elif isinstance(path[-2],
                                    javalang.tree.VariableDeclaration):
                        variables.append(node.name)
            except:
                pass

            # Lexically tokenize the source file
            lexed_src = pygments.lex(src, java_lexer)

            for i, token in enumerate(lexed_src):
                if token[0] in Token.Comment:
                    # Removing the license comment
                    if i == 0 and token[0] is Token.Comment.Multiline:
                        src = src[src.index(token[1]) + len(token[1]):]
                        continue
                    comments += token[1]
                elif token[0] is Token.Name.Class:
                    class_names.append(token[1])
                elif token[0] is Token.Name.Function:
                    method_names.append(token[1])

            # Get the package declaration if exists
            if parse_tree and parse_tree.package:
                package_name = parse_tree.package.name
            else:
                package_name = None

            if self.name == 'aspectj':
                src_files[os.path.relpath(
                    src_file, start=self.src)] = SourceFile(
                        src, comments, class_names, attributes, method_names,
                        variables, [os.path.basename(src_file).split('.')[0]],
                        package_name)
            else:
                # If source file has package declaration
                if package_name:
                    src_id = (package_name + '.' + os.path.basename(src_file))
                else:
                    src_id = os.path.basename(src_file)

                src_files[src_id] = SourceFile(
                    src, comments, class_names, attributes, method_names,
                    variables, [os.path.basename(src_file).split('.')[0]],
                    package_name)

        return src_files
Пример #30
0
    def src_parser(self):
        """Parse source code directory of a program and collect
        its java files.
        """

        # Getting the list of source files recursively from the source directory
        src_addresses = glob.glob(str(self.src) + '/**/*.java', recursive=True)

        # Creating a java lexer instance for pygments.lex() method
        java_lexer = JavaLexer()

        src_files = OrderedDict()

        # Looping to parse each source file
        for src_file in src_addresses:
            with open(src_file, encoding='latin-1') as file:
                src = file.read()

            # Placeholder for different parts of a source file
            comments = ''
            class_names = []
            attributes = []
            method_names = []
            variables = []
            methods = []
            methods_api = []

            # Source parsing
            parse_tree = None
            try:
                parse_tree = javalang.parse.parse(src)
                for path, node in parse_tree.filter(
                        javalang.tree.VariableDeclarator):
                    if isinstance(path[-2], javalang.tree.FieldDeclaration):
                        attributes.append(node.name)
                    elif isinstance(path[-2],
                                    javalang.tree.VariableDeclaration):
                        variables.append(node.name)
            except:
                pass

            # Extract methods
            if parse_tree:
                for _, node_to_find in parse_tree.filter(
                        javalang.tree.MethodDeclaration):
                    start, end = get_start_end_for_node(
                        node_to_find, parse_tree)
                    method = removeComments(get_string(start, end, src))
                    methods.append(method)
                    methods_api.append(node_to_find.documentation)

            # Trimming the source file
            ind = False
            if parse_tree:
                if parse_tree.imports:
                    last_imp_path = parse_tree.imports[-1].path
                    src = src[src.index(last_imp_path) + len(last_imp_path) +
                              1:]
                elif parse_tree.package:
                    package_name = parse_tree.package.name
                    src = src[src.index(package_name) + len(package_name) + 1:]
                else:  # There is no import and no package declaration
                    ind = True
            # javalang can't parse the source file
            else:
                ind = True

            # Lexically tokenize the source file
            lexed_src = pygments.lex(src, java_lexer)

            for i, token in enumerate(lexed_src):
                if token[0] in Token.Comment:
                    if ind and i == 0 and token[0] is Token.Comment.Multiline:
                        src = src[src.index(token[1]) + len(token[1]):]
                        continue
                    comments += token[1]
                elif token[0] is Token.Name.Class:
                    class_names.append(token[1])
                elif token[0] is Token.Name.Function:
                    method_names.append(token[1])

            # Get the package declaration if exists
            if parse_tree and parse_tree.package:
                package_name = parse_tree.package.name
            else:
                package_name = None

            if self.name == 'aspectj' or 'tomcat' or 'eclipse' or 'swt' or 'birt':
                src_files[os.path.relpath(
                    src_file, start=self.src)] = SourceFile(
                        src, comments, class_names, attributes, method_names,
                        variables, [os.path.basename(src_file).split('.')[0]],
                        package_name, methods, methods_api)
            else:
                # If source file has package declaration
                if package_name:
                    src_id = (package_name + '.' + os.path.basename(src_file))
                else:
                    src_id = os.path.basename(src_file)

                src_files[src_id] = SourceFile(
                    src, comments, class_names, attributes, method_names,
                    variables, [os.path.basename(src_file).split('.')[0]],
                    package_name, methods, methods_api)

        return src_files
Пример #31
0
    def do_api(self, args):
        """
        List and select methods from a given loaded API module

        := api list
        := api select
        := api analyzed list
        := api analyzed select
        """
        try:
            # List the available API methods from the target module
            if args.split()[0] == "list":
                if self.target_module:
                    print("\n")
                    for k, v in self.target_module.model.values.items():
                        for m in v:
                            print(
                                self.t.cyan("\t--> {} : {}".format(
                                    self.target_module.name, m)))
                    print("\n")
                else:
                    self.logger.surgical_log(
                        "info", "Target module has not been loaded (!)")
            # Select an API method from the target module
            elif args.split()[0] == "select":
                if self.target_module:
                    selection = raw_input(
                        self.t.yellow("[{}] ".format(datetime.now())) +
                        "Select method : ")
                    for k, v in self.target_module.model.values.items():
                        for m in v:
                            if m == selection:
                                self.logger.surgical_log(
                                    "info", "Searching ...")
                                from core.brains.surgical.lib.libsurgical import SurgicalLib
                                # Begin processing and return the results fomr the selected method
                                surgical_lib = SurgicalLib(
                                    self.target_module, self.vmx, self.vm, k,
                                    selection, self.methods)
                                # methods_api_usage will contain a list of tuples
                                self.methods_api_usage = surgical_lib.search()
                            else:
                                self.logger.surgical_log(
                                    "warn", "Method not found (!)")
            # Analyze the processed method list
            elif args.split()[0] == "analyzed":
                # List the methods that have been processed
                if args.split()[1] == "list":
                    if self.methods_api_usage:
                        print("\n")
                        for m in self.methods_api_usage:
                            print(
                                self.t.cyan("\t--> {} -> {} ".format(
                                    m[0].class_name, m[0].name)))
                        print("\n")
                    else:
                        SurgicalError("API usage not found (!)")
                        SurgicalError("Try running --> 'api select' again (!)")
                # Select from the processed method list
                elif args.split()[1] == "select":
                    if self.methods_api_usage:
                        selection = raw_input(
                            self.t.yellow("[{}] ".format(datetime.now())) +
                            "Select method : ")
                        for m in self.methods_api_usage:
                            if selection == m[0].name:
                                print("\n")
                                print(
                                    self.t.cyan("\t--> Class : {}".format(
                                        m[0].class_name)))
                                print(
                                    self.t.cyan("\t\t--> Method : {}".format(
                                        m[0].name)))
                                print(
                                    self.t.cyan(
                                        "\t\t\t --> XREFS ###########"))
                                self.u.print_xref("T",
                                                  m[1].method.XREFto.items)
                                self.u.print_xref("F",
                                                  m[1].method.XREFfrom.items)
                                print("\n")
                                print(
                                    highlight(m[2], JavaLexer(),
                                              TerminalFormatter()))
                    else:
                        SurgicalError("API usage not found (!)")
                        SurgicalError("Try running --> 'api select' again (!)")
        except Exception as e:
            SurgicalError(e.message)
Пример #32
0
 def setUp(self):
     self.lexer = JavaLexer()
     self.maxDiff = None
Пример #33
0
 def load_java_syntax(self):
     self.master.lexer = JavaLexer()
     self.master.initial_highlight()
Пример #34
0
class JavaTest(unittest.TestCase):

    def setUp(self):
        self.lexer = JavaLexer()
        self.maxDiff = None

    def testEnhancedFor(self):
        fragment = u'label:\nfor(String var2: var1) {}\n'
        tokens = [
            (Name.Label, u'label:'),
            (Text, u'\n'),
            (Keyword, u'for'),
            (Operator, u'('),
            (Name, u'String'),
            (Text, u' '),
            (Name, u'var2'),
            (Operator, u':'),
            (Text, u' '),
            (Name, u'var1'),
            (Operator, u')'),
            (Text, u' '),
            (Operator, u'{'),
            (Operator, u'}'),
            (Text, u'\n'),
        ]
        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))

    def testNumericLiterals(self):
        fragment = '0 5L 9__542_72l 0xbEEf 0X9_A 0_35 01 0b0___101_0'
        fragment += ' 0. .7_17F 3e-1_3d 1f 6_01.9e+3 0x.1Fp3 0XEP8D\n'
        tokens = [
            (Number.Integer, '0'),
            (Text, ' '),
            (Number.Integer, '5L'),
            (Text, ' '),
            (Number.Integer, '9__542_72l'),
            (Text, ' '),
            (Number.Hex, '0xbEEf'),
            (Text, ' '),
            (Number.Hex, '0X9_A'),
            (Text, ' '),
            (Number.Oct, '0_35'),
            (Text, ' '),
            (Number.Oct, '01'),
            (Text, ' '),
            (Number.Bin, '0b0___101_0'),
            (Text, ' '),
            (Number.Float, '0.'),
            (Text, ' '),
            (Number.Float, '.7_17F'),
            (Text, ' '),
            (Number.Float, '3e-1_3d'),
            (Text, ' '),
            (Number.Float, '1f'),
            (Text, ' '),
            (Number.Float, '6_01.9e+3'),
            (Text, ' '),
            (Number.Float, '0x.1Fp3'),
            (Text, ' '),
            (Number.Float, '0XEP8D'),
            (Text, '\n')
        ]
        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))