def generate_parse_tree(source): """Generates an ANTLR parse tree from a piece of source code.""" input_stream = FileStream(source) lexer = JavaLexer(input_stream) stream = CommonTokenStream(lexer) parser = JavaParser(stream) tree = parser.compilationUnit() return tree
def parse(self, file_content): inputS = InputStream.InputStream(file_content) lexer = JavaLexer(inputS) stream = CommonTokenStream(lexer) parser = JavaParser(stream) tree = parser.compilationUnit() self.lookupTable = dict() # tree.getText() return tree
def parse_file(path): file = open(path, 'r').read() codeStream = InputStream(file) lexer = JavaLexer(codeStream) # First lexing way tokens = CommonTokenStream(lexer) parser = JavaParser(tokens) parser.buildParseTrees = True return parser
def main(argv): if len(argv)>1: input = FileStream(argv[1]) else: input = InputStream(sample) lexer = JavaLexer(input) stream = CommonTokenStream(lexer) parser = JavaParser(stream) tree = parser.compilationUnit() # print(Trees.toStringTree(tree, None, parser)) print "Grammar %s has %d rules, %d tokens" % (parser.grammarFileName, len(parser.ruleNames), len(lexer.ruleNames)) print ', '.join(CollectTokenFeatures.feature_names) token_features = CollectTokenFeatures(stream) walker = ParseTreeWalker() walker.walk(token_features, tree) print token_features.feature_names
def extract_data(code): """ Parse a code string and collect features with a CollectTokenFeatures. Returns (tokens:list, inject_newlines:boolean[], indent:int[], whitespace:int[], features:list<object[]>) """ input = InputStream(code) lexer = JavaLexer(input) stream = CommonTokenStream(lexer) parser = JavaParser(stream) tree = parser.compilationUnit() collector = CollectTokenFeatures(stream) walker = ParseTreeWalker() walker.walk(collector, tree) return (stream.tokens, collector.inject_newlines, collector.indent, collector.whitespace, collector.features)
def main(argv): if len(argv) > 1: input = FileStream(argv[1]) else: input = InputStream(sample) lexer = JavaLexer(input) stream = CommonTokenStream(lexer) parser = JavaParser(stream) tree = parser.compilationUnit() # print(Trees.toStringTree(tree, None, parser)) print "Grammar %s has %d rules, %d tokens" % ( parser.grammarFileName, len(parser.ruleNames), len(lexer.ruleNames)) print ', '.join(CollectTokenFeatures.feature_names) token_features = CollectTokenFeatures(stream) walker = ParseTreeWalker() walker.walk(token_features, tree) print token_features.feature_names
def parse_for_methods(repo_path): """[this is the main mehtod where the actual antlr4 parsing happens] Arguments: repo_path {[str]} -- [holds repository path and only used for logging purpose] """ try: istream = FileStream(repo_path, encoding='utf-8') lexer = JavaLexer(istream) stream = CommonTokenStream(lexer) parser = JavaParser(stream) tree = parser.compilationUnit() # _using PatternListener to walk only through class and method Declarations walker = ParseTreeWalker() walker.walk(PatternListener(), tree) except Exception as e: print("Unexpected error: " + repo_path + " " + str(e))
def main(argv): input = FileStream(argv[1]) lexer = JavaLexer(input) stream = CommonTokenStream(lexer) parser = JavaParser(stream) tree = parser.compilationUnit() walker = ParseTreeWalker() listener = CompilationUnitListener() walker.walk(listener, tree) Classes = listener.Classes Interfaces = listener.Interfaces symbolTable = {} error = InitalizeSymbolTable(symbolTable, Classes, Interfaces) if (error < 0): return False graph = UMLGraph() for Interface in Interfaces: graph.addInterface(Interface) for Class in Classes: graph.addClass(Class) if Class.Extends in symbolTable: graph.addExtendsRelation(Class.ClassName, Class.Extends) for interface in Class.ImplementList: if interface["Type"] in symbolTable: graph.addImplementsRelation(Class.ClassName, interface["Type"]) print(str(graph)) #if __name__ == '__main__': #main(sys.argv) #MainClass = Java2UML() #result = MainClass.JavaCode2UML(sys.argv[1]) #print(result)
def standardizeJava(javaFiles: List[HookFile]) -> List[StandardizedFile]: standardizedFiles: List[StandardizedFile] = [] for javaFile in javaFiles: lexer = JavaLexer(InputStream(javaFile.fileData)) tokenStream = CommonTokenStream(lexer) parser = JavaParser(tokenStream) tree = parser.compilationUnit() visitor = JavaParserVisitor() visitor.visit(tree) var, funcs, classes, types = JavaSyntax.getLists() words = javaFile.fileData.split(' ') for i in range(len(words)): for v in var: words[i] = replaceIfContains(v, words[i], "V") for f in funcs: words[i] = replaceIfContains(f, words[i], "F") for c in classes: words[i] = replaceIfContains(c, words[i], "C") for t in types: words[i] = replaceIfContains(t, words[i], "T") stripped = ''.join(words).replace('\t', '').lower() standardizedFiles.append(StandardizedFile(javaFile, stripped)) JavaSyntax.clearLists() return standardizedFiles
def JavaCode2UML(self, string): #input = FileStream(string) #lexer = JavaLexer(input) lexer = JavaLexer(InputStream(string)) stream = CommonTokenStream(lexer) parser = JavaParser(stream) tree = parser.compilationUnit() walker = ParseTreeWalker() listener = CompilationUnitListener() walker.walk(listener, tree) Classes = listener.Classes Interfaces = listener.Interfaces symbolTable = {} error = InitalizeSymbolTable(symbolTable, Classes, Interfaces) if (error < 0): return False graph = UMLGraph() for Interface in Interfaces: graph.addInterface(Interface) for Class in Classes: graph.addClass(Class) if Class.Extends in symbolTable: graph.addExtendsRelation(Class.ClassName, Class.Extends) for interface in Class.ImplementList: if interface["Type"] in symbolTable: graph.addImplementsRelation(Class.ClassName, interface["Type"]) return str(graph)
def format_code(newline_forest, indent_forest, whitespace_forest, vec, code): """ Tokenize code and then, one token at a time, predict newline or not. Do prediction of newline on the fly, adjusting token line/column. """ # tokenize and wack location info in tokens input = InputStream(code) lexer = JavaLexer(input) stream = CommonTokenStream(lexer) stream.fill() # wipe out token location information in sample for t in stream.tokens: t.line = 0 t.column = 0 # parse to get parse tree parser = JavaParser(stream) tree = parser.compilationUnit() # compute feature vector for each token and adjust line/column as we walk tree collector = ProcessTokens(newline_forest, indent_forest, whitespace_forest, vec, stream) walker = ParseTreeWalker() walker.walk(collector, tree)
__author__ = 'jszheng' import sys from antlr4 import * from antlr4.InputStream import InputStream from JavaLexer import JavaLexer from JavaParser import JavaParser from ExtractInterfaceListener import ExtractInterfaceListener if __name__ == '__main__': if len(sys.argv) > 1: input_stream = FileStream(sys.argv[1]) else: input_stream = InputStream(sys.stdin.readline()) lexer = JavaLexer(input_stream) token_stream = CommonTokenStream(lexer) parser = JavaParser(token_stream) tree = parser.compilationUnit() listener = ExtractInterfaceListener(parser) walker = ParseTreeWalker() walker.walk(listener, tree)
def parse_java_deep(file): Command = "ctags -f - --kinds-java=* --fields=neK " + file global delimiter delimiter = "\r\0?\r?\0\r" try: astString = subprocess.check_output(Command, stderr=subprocess.STDOUT, shell=True).decode() except subprocess.CalledProcessError as e: print("Parser Error:", e) astString = "" f = open(file, 'r') lines = f.readlines() methodList = astString.split('\n') method = re.compile(r'(method)') number = re.compile(r'(\d+)') funcBody = re.compile(r'{([\S\s]*)}') string = "" funcId = 1 methodInstanceList = [] for i in methodList: elemList = re.sub(r'[\t\s ]{2,}', '', i) elemList = elemList.split("\t") methodInstance = function(file) methodInstance.funcBody = '' if i != '' and method.match(elemList[3]) and len(elemList) >= 6: methodInstance.name = elemList[0] methodInstance.parentFile = elemList[1] methodInstance.lines = (int(number.search(elemList[4]).group(0)), int(number.search(elemList[5]).group(0))) methodInstance.parentNumLoc = len(lines) string = "" string = string.join(lines[methodInstance.lines[0] - 1:methodInstance.lines[1]]) if funcBody.search(string): methodInstance.funcBody = methodInstance.funcBody + funcBody.search( string).group(0) lexer = JavaLexer(InputStream(string)) tokens = CommonTokenStream(lexer) parser = JavaParser(tokens) tree = parser.memberDeclaration() walker = ParseTreeWalker() listener = JavaParserListener() listener.variables = [] listener.parameters = [] listener.dataTypes = [] listener.methodCalls = [] walker.walk(listener, tree) methodInstance.variableList = listener.variables methodInstance.dataTypeList = listener.dataTypes methodInstance.funcCalleeList = listener.methodCalls methodInstance.parameterList = listener.parameters else: methodInstance.funcBody = " " methodInstance.funcId = funcId funcId += 1 methodInstanceList.append(methodInstance) print(methodInstance.funcBody) # print(tree.toStringTree(recog=parser)) #Not finished return methodInstanceList
import sys import antlr3 from JavaLexer import JavaLexer from JavaParser import JavaParser cStream = antlr3.StringStream(open(sys.argv[1]).read()) lexer = JavaLexer(cStream) tStream = antlr3.CommonTokenStream(lexer) parser = JavaParser(tStream) parser.compilationUnit() print "finished parsing OK"