def flatten_interfaces(node): interfaces = ASTNode('Interfaces') # Find all InterfaceType nodes. int_types = flatten(node, 'Interfaces', 'InterfaceType') # Flatten each InterfaceType name. for int_type in int_types.children: typ = ASTNode('Type', None, [flatten(int_type, 'ReferenceType', 'Identifier')]) interfaces.add(typ) return interfaces
def build_ast(parse_tree): ast = ASTNode('Root') # Start with CompilationUnit. unit = parse_tree.find_child('CompilationUnit') if unit is not None: cu = ASTNode('CompilationUnit') ast.add(build_top_level(ASTNode('CompilationUnit'), unit)) # Make sure that we use AST nodes. # ensure_ast_children(ast) return ast
def flatten(node, root_name, leaf_name): root = ASTNode(root_name) stack = [node] while len(stack) > 0: n = stack.pop() if n.name == leaf_name: root.add(n) else: stack.extend(n.children) root.children.reverse() return root
def flatten_leaves(node, root_name): root = ASTNode(root_name) stack = node.children while len(stack) > 0: n = stack.pop() if len(n.children) == 0: root.add(n) else: stack.extend(n.children) root.children.reverse() return root
def build_statement_wts(node): if node[0].name == 'Block': return build_block(node[0]) elif node[0].name == 'EmptyStatement': return ASTNode('EmptyStatement') # ExpressionStatement => StatementExpression. elif node[0].name == 'ExpressionStatement': expr_stmt = ASTNode('ExpressionStatement') expr_stmt.add(build_expr(node[0][0])) return expr_stmt else: # ReturnStatement return_stmt = ASTNode('ReturnStatement') if node[0][1].name == 'Expression': return_stmt.add(build_expr(node[0][1])) return return_stmt
def build_block(node): block = ASTNode('Block') # If no statements, return empty block. if node[1].name != 'BlockStatements': return block blk_stmts = flatten(node[1], 'Statements', 'BlockStatement') for blk_stmt in blk_stmts.children: # Local variable declaration. if blk_stmt[0].name == 'LocalVariableDeclarationStatement': block.add(build_local_variable_declaration(blk_stmt[0][0])) else: # Statement block.add(build_statement(blk_stmt[0])) return block
def build_local_variable_declaration(node): local_var = ASTNode('LocalVariableDeclaration') local_var.add(build_type(node[0])) # Extract name and initializer. var_declr = node[1][0] local_var.add(var_declr[0][0]) if len(var_declr.children) > 1: initializer = build_expr(var_declr[2][0]) local_var.add(ASTNode('Initializer', None, [initializer])) else: local_var.add(ASTNode('Initializer')) # No initializer return local_var
def build_array_access(node): array_access = ASTNode('ArrayAccess') # Determine the receiver (i.e., array being accessed). array_receiver = ASTNode('ArrayReceiver') if node[0].name == 'Name': array_receiver.add(flatten(node[0], 'Name', 'Identifier')) else: array_receiver.add(build_primary(node[0])) array_access.add(array_receiver) # Get the accessing expression. array_access.add(build_expr(node[2])) return array_access
def build_field_access(node): field_access = ASTNode('FieldAccess') field_access.add(ASTNode('FieldName', None, [node[2]])) if node[0].name == 'Primary': field_access.add(ASTNode('FieldReceiver', None, [build_primary(node[0])])) else: # Super field_access.add(ASTNode('FieldReceiver', None, [node[0]])) return field_access
def build_type(node): typ = ASTNode('Type') # return node if node[0].name == 'PrimitiveType': typ.add(flatten_leaves(node[0], 'PrimitiveType')) elif node[0][0].name == 'ClassOrInterfaceType': typ.add(flatten(node[0][0], 'ReferenceType', 'Identifier')) elif node[0][0][0].name == 'PrimitiveType': # ArrayType of PrimitiveType typ.add(ASTNode('ArrayType', None, [flatten_leaves(node[0][0][0], 'PrimitiveType')])) else: # ArrayType of ClassOrInterfaceType typ.add(ASTNode('ArrayType', None, [flatten(node[0][0][0], 'ReferenceType', 'Identifier')])) return typ
def build_if_statement(node): stmt = ASTNode('IfStatement') stmt.add(build_expr(node[2])) # Condition stmt.add(build_statement(node[4])) if len(node.children) > 5: # Else stmt.add(build_statement(node[6])) return stmt
def build_parameters(node): params = ASTNode('Parameters') # Get a list of FormalParameter. param_decls = flatten(node, 'Parameters', 'FormalParameter') for param_decl in param_decls.children: param = ASTNode('Parameter') param.add(build_type(param_decl[0])) param.add(param_decl[1][0]) # Name params.add(param) return params
def build_interface_structure(node): decl_node = ASTNode('InterfaceDeclaration') # Add modifiers and name (same for classes and interfaces). decl_node.add(flatten_leaves(node[0], 'Modifiers')) decl_node.add(ASTNode('InterfaceName', None, [node[2]])) # Extract interface extends. interfaces = ASTNode('Interfaces') decl_ints = node.find_child('ExtendsInterfaces') if decl_ints is not None: interfaces = flatten_interfaces(decl_ints) decl_node.add(interfaces) body = node.find_child('InterfaceBody') if body is None: logging.error("AST: missing InterfaceBody") sys.exit(1) # Joos only allows methods to be in interfaces. methods = flatten(body, 'Methods', 'InterfaceMemberDeclaration') decl_node.add(build_methods(methods)) return decl_node
def build_assignment(node): assignment = ASTNode('Assignment') assignment.add(build_left_hand_side(node[0])) assignment.add(build_expr(node[2])) return assignment
def build_arguments(node): arguments = ASTNode('Arguments') args = flatten(node, 'Arguments', 'Expression') for arg in args: arguments.add(build_expr(arg)) return arguments
def build_constructors(node): constructors = ASTNode('Constructors') for cons_decl in node.children: cons = ASTNode('ConstructorDeclaration') if hasattr(cons_decl, 'decl_order'): cons.decl_order = cons_decl.decl_order else: cons.decl_order = -1 # Extract modifiers. cons.add(flatten_leaves(cons_decl[0], 'Modifiers')) # Extract name. cons.add(cons_decl[1][0][0]) # Name # Extract parameters. if cons_decl[1][2].name == 'FormalParameterList': cons.add(build_parameters(cons_decl[1][2])) else: cons.add(ASTNode('Parameters')) # Extract body. cons.add(build_block(cons_decl[2])) constructors.add(cons) return constructors
def build_cast_expression(node): cast_expr = ASTNode('CastExpression') typ = ASTNode('Type') if node[2].name == 'Dims': # array type if node[1].name == 'PrimitiveType': typ.add(ASTNode('ArrayType', None, [flatten_leaves(node[1], 'PrimitiveType')])) else: # Name typ.add(ASTNode('ArrayType', None, [flatten(node[1], 'ReferenceType', 'Identifier')])) elif node[1].name == 'Expression': # Actually a name. typ.add(flatten(node[1], 'ReferenceType', 'Identifier')) else: # PrimitiveType typ.add(flatten_leaves(node[1], 'PrimitiveType')) cast_expr.add(typ) # Last child is the thing we're casting. cast_expr.add(build_expr(node[-1])) return cast_expr
def build_while_statement(node): stmt = ASTNode('WhileStatement') stmt.add(build_expr(node[2])) # Condition stmt.add(build_statement(node[4])) return stmt
def build_for_statement(node): stmt = ASTNode('ForStatement') for_init = node.find_child('ForInit') for_cond = node.find_child('Expression') for_updt = node.find_child('ForUpdate') for_body = node.find_child('StatementNoShortIf') if for_body is None: for_body = node.find_child('Statement') init_node = ASTNode('ForInit') if for_init is not None: if for_init[0].name == 'StatementExpressionList': init_node.add(build_expr(for_init[0][0])) else: # LocalVariableDeclaration init_node.add(build_local_variable_declaration(for_init[0])) stmt.add(init_node) cond_node = ASTNode('ForCondition') if for_cond is not None: cond_node.add(build_expr(for_cond)) stmt.add(cond_node) updt_node = ASTNode('ForUpdate') if for_updt is not None: updt_node.add(build_expr(for_updt[0][0])) stmt.add(updt_node) body_node = ASTNode('ForBody') if for_body is not None: body_node.add(build_statement(for_body)) stmt.add(body_node) return stmt
def build_fields(node): fields = ASTNode('Fields') for field_decl in node.children: field = ASTNode('FieldDeclaration') if hasattr(field_decl, 'decl_order'): field.decl_order = field_decl.decl_order else: field.decl_order = -1 # Extract modifiers. field.add(flatten_leaves(field_decl[0], 'Modifiers')) # Extract type. field.add(build_type(field_decl[1])) # Extract name. field.add(field_decl[2][0][0][0]) # Extract initializer. var_declr = field_decl[2][0] # VariableDeclarator if len(var_declr.children) > 1: initializer = build_expr(var_declr[2][0]) field.add(ASTNode('Initializer', None, [initializer])) else: field.add(ASTNode('Initializer')) # No initializer fields.add(field) return fields
def build_creation_expression(node): creation = ASTNode('CreationExpression') # We do some funky stuff to make sure that it matches 'Type'. typ = ASTNode('Type') # Note: both creation expression types have ClassType at second index. # For ArrayCreationExpression, we actually need to nest the type. if node.name == 'ArrayCreationExpression': # Type => ArrayType => PrimitiveType if node[1].name == 'PrimitiveType': typ.add(ASTNode('ArrayType', None, [flatten_leaves(node[1], 'PrimitiveType')])) # Type => ArrayType => ReferenceType else: typ.add(ASTNode('ArrayType', None, [flatten(node[1], 'ReferenceType', 'Identifier')])) else: # ClassInstanceCreationExpression if node[1].name == 'PrimitiveType': typ.add(flatten_leaves(node[1], 'PrimitiveType')) else: typ.add(flatten(node[1], 'ReferenceType', 'Identifier')) creation.add(typ) # Get the arguments. args = ASTNode('Arguments') if node.name == 'ArrayCreationExpression': if node[2].name == 'DimExprs': args.add(build_expr(node[2][0][1])) else: # ClassInstanceCreationExpression if node[3].name == 'ArgumentList': args = build_arguments(node[3]) creation.add(args) return creation
def build_methods(node): methods = ASTNode('Methods') for method_decl in node.children: method = ASTNode('MethodDeclaration') if hasattr(method_decl, 'decl_order'): method.decl_order = method_decl.decl_order else: method.decl_order = -1 # Extract modifiers. method.add(flatten_leaves(method_decl[0][0], 'Modifiers')) # Extract return type. if method_decl[0][1].name == 'Void': method.add(ASTNode('Type', None, [method_decl[0][1]])) else: method.add(build_type(method_decl[0][1])) # Non-void. # Name. method.add(method_decl[0][2][0]) # Extract parameters. if method_decl[0][2][2].name == 'FormalParameterList': method.add(build_parameters(method_decl[0][2][2])) else: method.add(ASTNode('Parameters')) # Extract body. # We have two levels, for differentiating between: # 1. Methods with no body (i.e., abstract) # 2. Methods with an empty body (i.e., {}) # 3. Methods with a nonempty body body = ASTNode('MethodBody') if method_decl[1].name != 'SemiColon' and \ method_decl[1][0].name != 'SemiColon': body.add(build_block(method_decl[1][0])) method.add(body) methods.add(method) return methods
def build_class_structure(node): decl_node = ASTNode('ClassDeclaration') # Add modifiers and name (same for classes and interfaces). decl_node.add(flatten_leaves(node[0], 'Modifiers')) decl_node.add(ASTNode('ClassName', None, [node[2]])) # Extract superclass. superstuff = node.find_child('SuperStuff') if superstuff is None: decl_node.add(ASTNode('Superclass')) else: typ = ASTNode('Type', None, [flatten(superstuff, 'ReferenceType', 'Identifier')]) decl_node.add(ASTNode('Superclass', None, [typ])) # Extract interface implements. interfaces = ASTNode('Interfaces') decl_ints = node.find_child('Interfaces') if decl_ints is not None: interfaces = flatten_interfaces(decl_ints) decl_node.add(interfaces) body = node.find_child('ClassBody') if body is None: logging.error("AST: missing ClassBody") sys.exit(1) # Extract fields, constructors, and methods. members = flatten(body, 'Members', 'ClassBodyDeclaration') fields = flatten(members, 'Fields', 'FieldDeclaration') decl_node.add(build_fields(fields)) constructors = flatten(members, 'Constructors', 'ConstructorDeclaration') decl_node.add(build_constructors(constructors)) methods = flatten(members, 'Methods', 'MethodDeclaration') decl_node.add(build_methods(methods)) return decl_node
def build_method_invocation(node): method_invo = ASTNode('MethodInvocation') # First, extract the method name and receiver (i.e., the "thing we're # calling the method on"). method_receiver = ASTNode('MethodReceiver') if node[0].name == 'Name': qualified_name = flatten(node[0], 'MethodName', 'Identifier') # If the name is qualified (i.e., more than 1 Identifier), we take the # last Identifier to be the method name. There is a method receiver # only if there are at least 2 identifiers. # Note that the latter case of no method receivers declared is possible # since Joos allows "implicit this for methods". if len(qualified_name.children) == 1: method_invo.add(qualified_name) else: method_name = qualified_name.children.pop() method_invo.add(ASTNode('MethodName', None, [method_name])) qualified_name.name = 'Name' method_receiver.add(qualified_name) else: # Name is at position 2, receiver is the Primary at position 0. method_invo.add(ASTNode('MethodName', None, [node[2]])) method_receiver.add(build_primary(node[0])) method_invo.add(method_receiver) method_args = node.find_child('ArgumentList') if method_args is not None: method_invo.add(build_arguments(method_args)) else: method_invo.add(ASTNode('Arguments')) return method_invo