Пример #1
0
def generate_parse_tree(source):
    """Generates an ANTLR parse tree from a piece of source code."""

    input_stream = FileStream(source)
    lexer = JavaLexer(input_stream)
    stream = CommonTokenStream(lexer)
    parser = JavaParser(stream)
    tree = parser.compilationUnit()
    return tree
Пример #2
0
 def parse(self, file_content):
     inputS = InputStream.InputStream(file_content)
     lexer = JavaLexer(inputS)
     stream = CommonTokenStream(lexer)
     parser = JavaParser(stream)
     tree = parser.compilationUnit()
     self.lookupTable = dict()
     # tree.getText()
     return tree
Пример #3
0
def parse_file(path):
    file = open(path, 'r').read()

    codeStream = InputStream(file)
    lexer = JavaLexer(codeStream)

    # First lexing way
    tokens = CommonTokenStream(lexer)
    parser = JavaParser(tokens)
    parser.buildParseTrees = True

    return parser
Пример #4
0
def main(argv):
    if len(argv)>1:
        input = FileStream(argv[1])
    else:
        input = InputStream(sample)
    lexer = JavaLexer(input)
    stream = CommonTokenStream(lexer)
    parser = JavaParser(stream)
    tree = parser.compilationUnit()
    # print(Trees.toStringTree(tree, None, parser))
    print "Grammar %s has %d rules, %d tokens" % (parser.grammarFileName, len(parser.ruleNames), len(lexer.ruleNames))
    print ', '.join(CollectTokenFeatures.feature_names)
    token_features = CollectTokenFeatures(stream)
    walker = ParseTreeWalker()
    walker.walk(token_features, tree)
    print token_features.feature_names
Пример #5
0
def extract_data(code):
    """
    Parse a code string and collect features with a CollectTokenFeatures.
    Returns (tokens:list, inject_newlines:boolean[], indent:int[],
    whitespace:int[], features:list<object[]>)
    """
    input = InputStream(code)
    lexer = JavaLexer(input)
    stream = CommonTokenStream(lexer)
    parser = JavaParser(stream)
    tree = parser.compilationUnit()
    collector = CollectTokenFeatures(stream)
    walker = ParseTreeWalker()
    walker.walk(collector, tree)
    return (stream.tokens, collector.inject_newlines,
            collector.indent, collector.whitespace, collector.features)
Пример #6
0
def main(argv):
    if len(argv) > 1:
        input = FileStream(argv[1])
    else:
        input = InputStream(sample)
    lexer = JavaLexer(input)
    stream = CommonTokenStream(lexer)
    parser = JavaParser(stream)
    tree = parser.compilationUnit()
    # print(Trees.toStringTree(tree, None, parser))
    print "Grammar %s has %d rules, %d tokens" % (
        parser.grammarFileName, len(parser.ruleNames), len(lexer.ruleNames))
    print ', '.join(CollectTokenFeatures.feature_names)
    token_features = CollectTokenFeatures(stream)
    walker = ParseTreeWalker()
    walker.walk(token_features, tree)
    print token_features.feature_names
def parse_for_methods(repo_path):
    """[this is the main mehtod where the actual antlr4 parsing happens]

    Arguments:
        repo_path {[str]} -- [holds repository path and only used for logging purpose]
    """
    try:
        istream = FileStream(repo_path, encoding='utf-8')
        lexer = JavaLexer(istream)
        stream = CommonTokenStream(lexer)
        parser = JavaParser(stream)
        tree = parser.compilationUnit()

        # _using PatternListener to walk only through class and method Declarations
        walker = ParseTreeWalker()
        walker.walk(PatternListener(), tree)

    except Exception as e:
        print("Unexpected error:  " + repo_path + "   " + str(e))
Пример #8
0
def main(argv):
    input = FileStream(argv[1])
    lexer = JavaLexer(input)
    stream = CommonTokenStream(lexer)
    parser = JavaParser(stream)
    tree = parser.compilationUnit()
    walker = ParseTreeWalker()
    listener = CompilationUnitListener()
    walker.walk(listener, tree)

    Classes = listener.Classes
    Interfaces = listener.Interfaces
    symbolTable = {}

    error = InitalizeSymbolTable(symbolTable, Classes, Interfaces)

    if (error < 0):
        return False

    graph = UMLGraph()

    for Interface in Interfaces:
        graph.addInterface(Interface)

    for Class in Classes:
        graph.addClass(Class)
        if Class.Extends in symbolTable:
            graph.addExtendsRelation(Class.ClassName, Class.Extends)
        for interface in Class.ImplementList:
            if interface["Type"] in symbolTable:
                graph.addImplementsRelation(Class.ClassName, interface["Type"])

    print(str(graph))


#if __name__ == '__main__':
#main(sys.argv)
#MainClass = Java2UML()
#result = MainClass.JavaCode2UML(sys.argv[1])
#print(result)
Пример #9
0
def standardizeJava(javaFiles: List[HookFile]) -> List[StandardizedFile]:
    standardizedFiles: List[StandardizedFile] = []

    for javaFile in javaFiles:
        lexer = JavaLexer(InputStream(javaFile.fileData))
        tokenStream = CommonTokenStream(lexer)
        parser = JavaParser(tokenStream)
        tree = parser.compilationUnit()

        visitor = JavaParserVisitor()
        visitor.visit(tree)

        var, funcs, classes, types = JavaSyntax.getLists()

        words = javaFile.fileData.split(' ')

        for i in range(len(words)):
            for v in var:
                words[i] = replaceIfContains(v, words[i], "V")

            for f in funcs:
                words[i] = replaceIfContains(f, words[i], "F")

            for c in classes:
                words[i] = replaceIfContains(c, words[i], "C")

            for t in types:
                words[i] = replaceIfContains(t, words[i], "T")

        stripped = ''.join(words).replace('\t', '').lower()

        standardizedFiles.append(StandardizedFile(javaFile, stripped))

        JavaSyntax.clearLists()

    return standardizedFiles
Пример #10
0
    def JavaCode2UML(self, string):
        #input = FileStream(string)
        #lexer = JavaLexer(input)

        lexer = JavaLexer(InputStream(string))
        stream = CommonTokenStream(lexer)
        parser = JavaParser(stream)
        tree = parser.compilationUnit()
        walker = ParseTreeWalker()
        listener = CompilationUnitListener()
        walker.walk(listener, tree)

        Classes = listener.Classes
        Interfaces = listener.Interfaces
        symbolTable = {}

        error = InitalizeSymbolTable(symbolTable, Classes, Interfaces)

        if (error < 0):
            return False

        graph = UMLGraph()

        for Interface in Interfaces:
            graph.addInterface(Interface)

        for Class in Classes:
            graph.addClass(Class)
            if Class.Extends in symbolTable:
                graph.addExtendsRelation(Class.ClassName, Class.Extends)
            for interface in Class.ImplementList:
                if interface["Type"] in symbolTable:
                    graph.addImplementsRelation(Class.ClassName,
                                                interface["Type"])

        return str(graph)
Пример #11
0
def format_code(newline_forest, indent_forest, whitespace_forest, vec, code):
    """
    Tokenize code and then, one token at a time, predict newline or not.
    Do prediction of newline on the fly, adjusting token line/column.
    """

    # tokenize and wack location info in tokens
    input = InputStream(code)
    lexer = JavaLexer(input)
    stream = CommonTokenStream(lexer)
    stream.fill()
    # wipe out token location information in sample
    for t in stream.tokens:
        t.line = 0
        t.column = 0

    # parse to get parse tree
    parser = JavaParser(stream)
    tree = parser.compilationUnit()

    # compute feature vector for each token and adjust line/column as we walk tree
    collector = ProcessTokens(newline_forest, indent_forest, whitespace_forest, vec, stream)
    walker = ParseTreeWalker()
    walker.walk(collector, tree)
Пример #12
0
__author__ = 'jszheng'

import sys
from antlr4 import *
from antlr4.InputStream import InputStream

from JavaLexer import JavaLexer
from JavaParser import JavaParser

from ExtractInterfaceListener import ExtractInterfaceListener

if __name__ == '__main__':
    if len(sys.argv) > 1:
        input_stream = FileStream(sys.argv[1])
    else:
        input_stream = InputStream(sys.stdin.readline())

    lexer = JavaLexer(input_stream)
    token_stream = CommonTokenStream(lexer)
    parser = JavaParser(token_stream)
    tree = parser.compilationUnit()

    listener = ExtractInterfaceListener(parser)
    walker = ParseTreeWalker()
    walker.walk(listener, tree)



Пример #13
0
def parse_java_deep(file):
    Command = "ctags -f - --kinds-java=* --fields=neK " + file
    global delimiter
    delimiter = "\r\0?\r?\0\r"

    try:
        astString = subprocess.check_output(Command,
                                            stderr=subprocess.STDOUT,
                                            shell=True).decode()

    except subprocess.CalledProcessError as e:
        print("Parser Error:", e)
        astString = ""

    f = open(file, 'r')
    lines = f.readlines()
    methodList = astString.split('\n')
    method = re.compile(r'(method)')
    number = re.compile(r'(\d+)')
    funcBody = re.compile(r'{([\S\s]*)}')
    string = ""
    funcId = 1
    methodInstanceList = []

    for i in methodList:
        elemList = re.sub(r'[\t\s ]{2,}', '', i)
        elemList = elemList.split("\t")
        methodInstance = function(file)
        methodInstance.funcBody = ''

        if i != '' and method.match(elemList[3]) and len(elemList) >= 6:
            methodInstance.name = elemList[0]
            methodInstance.parentFile = elemList[1]
            methodInstance.lines = (int(number.search(elemList[4]).group(0)),
                                    int(number.search(elemList[5]).group(0)))
            methodInstance.parentNumLoc = len(lines)
            string = ""
            string = string.join(lines[methodInstance.lines[0] -
                                       1:methodInstance.lines[1]])
            if funcBody.search(string):
                methodInstance.funcBody = methodInstance.funcBody + funcBody.search(
                    string).group(0)
                lexer = JavaLexer(InputStream(string))
                tokens = CommonTokenStream(lexer)
                parser = JavaParser(tokens)
                tree = parser.memberDeclaration()
                walker = ParseTreeWalker()
                listener = JavaParserListener()
                listener.variables = []
                listener.parameters = []
                listener.dataTypes = []
                listener.methodCalls = []
                walker.walk(listener, tree)
                methodInstance.variableList = listener.variables
                methodInstance.dataTypeList = listener.dataTypes
                methodInstance.funcCalleeList = listener.methodCalls
                methodInstance.parameterList = listener.parameters
            else:
                methodInstance.funcBody = " "
            methodInstance.funcId = funcId
            funcId += 1
            methodInstanceList.append(methodInstance)
            print(methodInstance.funcBody)
#            print(tree.toStringTree(recog=parser))
#Not finished
    return methodInstanceList
Пример #14
0
import sys
import antlr3
from JavaLexer import JavaLexer
from JavaParser import JavaParser

cStream = antlr3.StringStream(open(sys.argv[1]).read())
lexer = JavaLexer(cStream)
tStream = antlr3.CommonTokenStream(lexer)
parser = JavaParser(tStream)
parser.compilationUnit()
print "finished parsing OK"