def handle_label_stmt(self, ast_item, parent_node): stmt = Node(self.tokenizers['RES'].get_token(ast_item.kind.name), is_reserved=True, parent=parent_node) name = Node(self.tokenizers['RES'].get_token('NAME'), is_reserved=True, parent=parent_node) self.create_terminal_nodes(ast_item.spelling, ast_item, name) # Node(self.tn.get_token(ast_item.spelling), is_reserved=False, parent=stmt) return stmt
def handle_type_ref(self, ast_item, parent_node): if not 'std' in ast_item.type.spelling: type_ref = Node(self.tokenizers['RES'].get_token( ast_item.kind.name), is_reserved=True, parent=parent_node) Node(self.tokenizers['NAME'].get_token(ast_item.type.spelling), is_reserved=False, parent=type_ref, decl_line=ast_item.referenced.location.line)
def handle_lambda_expr(self, ast_item, parent_node, parse_item, program): lambda_expr = Node(self.tokenizers['RES'].get_token( ast_item.kind.name), is_reserved=True, parent=parent_node) tokens = [t.spelling for t in ast_item.get_tokens()][1:] capture_clause_ended = False capture_clause_tokens = [] for token in tokens: if token == ']': capture_clause_ended = True if capture_clause_ended: break capture_clause_tokens.append(token) capture_clauses = ''.join(capture_clause_tokens).split(',') for capture_clause in capture_clauses: capt_clause_node = Node( self.tokenizers['RES'].get_token('CAPTURE_CLAUSE'), is_reserved=True, parent=lambda_expr) self.create_terminal_nodes(capture_clause, ast_item, capt_clause_node) children = ast_item.get_children() for child in children: parm_declarations = False if child.kind == CursorKind.PARM_DECL: if not parm_declarations: parm_decl = Node( self.tokenizers['RES'].get_token('PARM_DECL'), is_reserved=True, parent=lambda_expr) parm_declarations = True self.handle_type(child, parm_decl) declarator = Node( self.tokenizers['RES'].get_token('DECLARATOR'), is_reserved=True, parent=parm_decl) reference = Node(self.tokenizers['RES'].get_token('NAME'), is_reserved=True, parent=declarator) self.create_terminal_nodes(child.spelling, child, reference) for c in child.get_children(): parse_item(c, declarator, program) return lambda_expr
def create_type_node(self, type_string, parent_node): if type_string.isdigit(): type_node = Node( self.tokenizers['RES'].get_token('INTEGER_LITERAL'), is_reserved=True, parent=parent_node) Node(self.tokenizers['LITERAL'].get_token(type_string), is_reserved=False, parent=type_node) elif type_string in self.builtin_types: record_type = Node(self.tokenizers['RES'].get_token('TYPE'), is_reserved=True, parent=parent_node) Node(self.tokenizers['TYPE'].get_token(type_string), is_reserved=False, parent=record_type) elif 'type-parameter' in type_string: record_type = Node(self.tokenizers['RES'].get_token('TYPE'), is_reserved=True, parent=parent_node) Node(self.tokenizers['TYPE'].get_token('T'), is_reserved=False, parent=record_type) else: record_type = Node(self.tokenizers['RES'].get_token('TYPE_REF'), is_reserved=True, parent=parent_node) Node(self.tokenizers['NAME'].get_token(type_string), is_reserved=False, parent=record_type)
def build_type_node(self, type_string, record_type, parent_node): if record_type: type_record = Node(self.tokenizers['RES'].get_token('TYPE_RECORD'), is_reserved=True, parent=parent_node) self.create_type_node(type_string, type_record) return Node( self.tokenizers['RES'].get_token('TYPE_RECORD_ELEMENTS'), is_reserved=True, parent=type_record) else: self.create_type_node(type_string, parent_node)
def handle_func_cast_expr(self, ast_item, parent_node): func_cast_expr = Node(self.tokenizers['RES'].get_token( ast_item.kind.name), is_reserved=True, parent=parent_node) self.handle_type(ast_item, func_cast_expr) return func_cast_expr
def handle_static_cast_expr(self, ast_item, parent_node): static_cast = Node(self.tokenizers['RES'].get_token( ast_item.kind.name), is_reserved=True, parent=parent_node) self.handle_type(ast_item, static_cast) return static_cast
def parse_ast(self, program: str, imports: list = None, thread_nr: int = 0): os.makedirs('temp', exist_ok=True) # Create temp file path for each trhead for clang to save in memory contents temp_file_path = os.path.join('temp', f'tmp{thread_nr}.cpp') # Set arguments and add compiler system include paths (with ccsyspath) args = '-x c++ --std=c++20'.split() syspath = ccsyspath.system_include_paths('clang') incargs = [b'-I' + inc for inc in syspath] args = args + incargs if imports is None: program, imports = extract_imports(program) else: imports = [ ele for ele in imports[1:-1].split("'") if ele != '' and ele != ', ' ] # Preprocess the program, expand the macros preprocessed_program = self.preprocess_program(program, temp_file_path, imports) # print(preprocessed_program) # Parse the program to a clang AST tu = self.index.parse(temp_file_path, unsaved_files=[(temp_file_path, preprocessed_program)], args=args, options=0) # Retrieve only the cursor items (children) that contain the program code (no import code) cursor_items = self.get_cursor_items(tu.cursor, temp_file_path) # Create a root node root_node = Node(self.tokenizers['RES'].get_token('root'), is_reserved=True) # for cursor_item in cursor_items: # for c in cursor_item.walk_preorder(): # print(f'spelling: {c.spelling}, kind: {c.kind.name}, type spelling: {c.type.spelling}, return type: {c.type.get_result().spelling}, type kind: {c.type.kind}') # Parse each cursor item for cursor_item in cursor_items: self.parse_item(cursor_item, root_node, program) shutil.rmtree('temp') # Return the root node filled with children to form the AST return root_node
def handle_operator(self, ast_item, parent_node): if ast_item.kind == CursorKind.BINARY_OPERATOR or ast_item.kind == CursorKind.COMPOUND_ASSIGNMENT_OPERATOR: try: operator_index = len( list(list(ast_item.get_children())[0].get_tokens())) op_name = [ list(ast_item.get_tokens())[operator_index].spelling ] except IndexError: len_first_child = len([ t.spelling for t in list(ast_item.get_children())[0].get_tokens() ]) len_second_child = len([ t.spelling for t in list(ast_item.get_children())[1].get_tokens() ]) operator_index = len_first_child - len_second_child - 1 try: op_name = [ list(ast_item.get_tokens())[operator_index].spelling ] except IndexError: op_name = [[ t.spelling for t in list(ast_item.get_children())[1].get_tokens() ][0]] elif ast_item.kind == CursorKind.UNARY_OPERATOR: tokens = list(ast_item.get_tokens()) if utils.is_operator_token(tokens[0].spelling): op_name = tokens[0].spelling if op_name in ['++', '--']: op_name = 'PRE_' + op_name elif utils.is_operator_token(tokens[-1].spelling): op_name = tokens[-1].spelling if op_name in ['++', '--']: op_name = 'POST_' + op_name else: print( f'UNARY OPERATOR EXCEPTION: {[t.spelling for t in tokens]}' ) op_name = '' op_name = [op_name] else: op_name = utils.get_operator(ast_item) operator = Node( self.tokenizers['RES'].get_token(ast_item.kind.name.strip() + '_' + '_'.join(op_name)), is_reserved=True, parent=parent_node) return operator
def handle_typedef(self, ast_item, parent_node): # Set top node as TYPEDEF_DECL typedef_decl = Node(self.tokenizers['RES'].get_token( ast_item.kind.name), is_reserved=True, parent=parent_node) # Set first child als TYPE_DEF type_def = Node(self.tokenizers['RES'].get_token('TYPE_DEF'), is_reserved=True, parent=typedef_decl) self.handle_type(ast_item, type_def) # Set second child as IDENTIFIER identifier = Node(self.tokenizers['RES'].get_token('IDENTIFIER'), is_reserved=True, parent=typedef_decl) # Set value of IDENTIFIER to spelling of node self.create_terminal_nodes(ast_item.spelling, ast_item, identifier)
def handle_literal(self, ast_item, parent_node, program): if self.tokenizers['RES'].get_label( parent_node.token) != 'COMPOUND_STMT': lit_type = Node(self.tokenizers['RES'].get_token( ast_item.kind.name), is_reserved=True, parent=parent_node) token = next(ast_item.get_tokens(), None) if token: Node(self.tokenizers['LITERAL'].get_token(token.spelling), is_reserved=False, parent=lit_type) else: lit = program[ast_item.extent.start.offset:ast_item.extent.end. offset] Node(self.tokenizers['LITERAL'].get_token(lit), is_reserved=False, parent=lit_type)
def handle_for_range(self, ast_item, parent_node): stmt = Node(self.tokenizers['RES'].get_token(ast_item.kind.name), is_reserved=True, parent=parent_node) # Handle first child that is always a variable declaration first_child = next(ast_item.get_children()) var_decl = Node(self.tokenizers['RES'].get_token( first_child.kind.name), is_reserved=True, parent=stmt) self.handle_type(first_child, var_decl) declarator = Node(self.tokenizers['RES'].get_token('DECLARATOR'), is_reserved=True, parent=var_decl) reference = Node(self.tokenizers['RES'].get_token('NAME'), is_reserved=True, parent=declarator) self.create_terminal_nodes(first_child.spelling, first_child, reference) return stmt
def create_terminal_nodes(self, label, ast_item, parent_node, tokens=None): if ast_item.referenced is not None: decl_line = ast_item.referenced.location.line else: decl_line = ast_item.location.line if self.split_terminals: # Splilt label by: '[', ']', '<', '>', ' ', '::', ',' split_label = [ el for el in re.split('(\[|\]|<|>| |::|,)', label) if len(el.strip()) > 0 ] for label in split_label: Node(self.tokenizers['NAME'].get_token(label), is_reserved=False, parent=parent_node, decl_line=decl_line) else: Node(self.tokenizers['NAME'].get_token(label), is_reserved=False, parent=parent_node, decl_line=decl_line)
def extract_builtin_type(self, type_string, parent_node): for _ in range(len(re.findall('\*', type_string))): parent_node = Node(self.tokenizers['RES'].get_token('POINTER'), is_reserved=True, parent=parent_node) type_string = type_string.replace('*', '')\ .replace('&', '')\ .replace('const', '')\ .replace('std::', '')\ .replace('::value_type', '')\ .replace('*', '')\ .replace('&', '')\ .replace('struct', '').strip() # Remove spaces between non-alphanumeric characters and commas type_string = re.sub('\s*([^A-Za-z,])\s*', r'\1', type_string) self.build_type_subtree(type_string, parent_node)
def handle_reference(self, ast_item, parent_node): if parent_node: parent_func_name = [ '' if n.children[0].res else self.tokenizers['NAME'].get_label( n.children[0].token.split('::')[-1]) for n in parent_node.children if self.tokenizers['RES'].get_label( n.token) in ['NAME', 'REF', 'REF_BUILTIN'] and self. tokenizers['RES'].get_label(parent_node.token) == 'CALL_EXPR' ] else: parent_func_name = [] if ast_item.spelling \ and ast_item.spelling not in parent_func_name: # and not (self.tokenizers['RES'].get_label(parent_node.token) == 'DECLARATOR' and 'REF' in ast_item.kind.name): # print('AFTER: ', ast_item.spelling, ast_item.extent) if 'tmp' not in str(ast_item.referenced.location): reference = Node( self.tokenizers['RES'].get_token('REF_BUILTIN'), True, parent=parent_node) Node(self.tokenizers['NAME_BUILTIN'].get_token( ast_item.spelling), False, parent=reference) else: reference = Node(self.tokenizers['RES'].get_token('REF'), True, parent=parent_node) Node(self.tokenizers['NAME'].get_token(ast_item.spelling), False, parent=reference, decl_line=ast_item.referenced.location.line) return reference elif not ast_item.spelling and ast_item.kind == CursorKind.MEMBER_REF_EXPR: tokens = [t.spelling for t in ast_item.get_tokens()] member_ref = tokens[tokens.index('.') + 1] reference = Node(self.tokenizers['RES'].get_token('REF'), is_reserved=True, parent=parent_node) Node(self.tokenizers['NAME'].get_token(member_ref), is_reserved=False, parent=reference, decl_line=ast_item.referenced.location.line) return reference
def parse_item(self, ast_item, parent_node, program): # Useless AST primitives skip_kinds = [ CursorKind.UNEXPOSED_EXPR, CursorKind.OVERLOADED_DECL_REF, CursorKind.TEMPLATE_REF ] # print(ast_item.spelling, ast_item.kind.name, ast_item.type.spelling, [t.spelling for t in ast_item.get_tokens()]) # Skip useless AST primitives and exceptions -> continue straight with their children if ast_item.kind in skip_kinds \ or 'operatorbool' == ast_item.spelling \ or 'operator bool' == ast_item.spelling \ or (('std::string' == ast_item.type.spelling \ or 'basic_string' == ast_item.spelling) \ and ast_item.kind in [CursorKind.TYPE_REF, CursorKind.CALL_EXPR]): pass # Parse typdef elif utils.is_typedef(ast_item): self.nh.handle_typedef(ast_item, parent_node) # parse declaration elif ast_item.kind.is_declaration(): parent_node = self.nh.handle_declaration(ast_item, parent_node, self.parse_item, program) # parse operator elif utils.is_operator(ast_item): parent_node = self.nh.handle_operator(ast_item, parent_node) # parse literal elif utils.is_literal(ast_item): self.nh.handle_literal(ast_item, parent_node, program) # parse call expression elif utils.is_call_expr(ast_item): parent_node = self.nh.handle_call_expr(ast_item, parent_node, self.parse_item, program) # parse reference elif utils.is_reference(ast_item): p_node = self.nh.handle_reference(ast_item, parent_node) if p_node: parent_node = p_node # parse type ref elif ast_item.kind == CursorKind.TYPE_REF \ and parent_node\ and self.tokenizers['RES'].get_label(parent_node.token) not in ['root', 'DECLARATOR', 'FUNCTION_DECL', 'FUNCTION_TEMPLATE', 'ARGUMENTS', 'CXX_FUNCTIONAL_CAST_EXPR']: self.nh.handle_type_ref(ast_item, parent_node) # Parse for range -> for(int a:v) {...} elif ast_item.kind == CursorKind.CXX_FOR_RANGE_STMT: parent_node = self.nh.handle_for_range(ast_item, parent_node) # Parse cast expressions -> (int) a elif ast_item.kind == CursorKind.CSTYLE_CAST_EXPR: parent_node = self.nh.handle_cast_expr(ast_item, parent_node) elif ast_item.kind == CursorKind.CXX_FUNCTIONAL_CAST_EXPR: parent_node = self.nh.handle_func_cast_expr(ast_item, parent_node) elif ast_item.kind == CursorKind.LAMBDA_EXPR: parent_node = self.nh.handle_lambda_expr(ast_item, parent_node, self.parse_item, program) elif ast_item.kind == CursorKind.CXX_STATIC_CAST_EXPR: parent_node = self.nh.handle_static_cast_expr( ast_item, parent_node) elif ast_item.kind == CursorKind.LABEL_REF: self.nh.handle_reference(ast_item, parent_node) elif ast_item.kind == CursorKind.LABEL_STMT: parent_node = self.nh.handle_label_stmt(ast_item, parent_node) # if not one of the above -> create simple parent node of the kind of the item elif ast_item.kind != CursorKind.TYPE_REF: # print(ast_item.spelling, ast_item.kind.name) parent_node = Node(self.tokenizers['RES'].get_token( ast_item.kind.name), is_reserved=True, parent=parent_node) # Do not iterate through children that we have already treated as arguments arguments = [] if utils.is_call_expr(ast_item): arguments = [c.spelling for c in ast_item.get_arguments()] # Already handled first child of for range statement, so start from second child if ast_item.kind == CursorKind.CXX_FOR_RANGE_STMT: for index, child in enumerate(list(ast_item.get_children())[1:]): # Add compound statment -> {...} if this is missing if index == len(list(ast_item.get_children( ))[1:]) - 1 and child.kind != CursorKind.COMPOUND_STMT: compound_stmt = Node( self.tokenizers['RES'].get_token('COMPOUND_STMT'), is_reserved=True, parent=parent_node) self.parse_item(child, compound_stmt, program) else: self.parse_item(child, parent_node, program) # Handle one liner if/while statements with no compound statement (={..}) as children -> if (...) return x; ADD COMPOUND STATEMENT ANYWAY elif (ast_item.kind == CursorKind.IF_STMT or ast_item.kind == CursorKind.WHILE_STMT)\ and any(CursorKind.COMPOUND_STMT != child.kind for child in list(ast_item.get_children())[1:]): for index, child in enumerate(ast_item.get_children()): # print(child.spelling, child.kind.name, child.type.spelling, index) if ( index != 1 and index < len(list(ast_item.get_children())) - 1 ) or child.kind == CursorKind.COMPOUND_STMT or child.kind == CursorKind.IF_STMT: self.parse_item(child, parent_node, program) else: compound_stmt = Node( self.tokenizers['RES'].get_token('COMPOUND_STMT'), is_reserved=True, parent=parent_node) self.parse_item(child, compound_stmt, program) # Handle for statements with no compound statement. ADD COMPOUND STATEMENT elif (ast_item.kind == CursorKind.FOR_STMT or ast_item.kind == CursorKind.CXX_FOR_RANGE_STMT) and list(ast_item.get_children( ))[-1].kind != CursorKind.COMPOUND_STMT: compound_stmt = None for index, child in enumerate(ast_item.get_children()): if index < len(list(ast_item.get_children()) ) - 1 or child.kind == CursorKind.COMPOUND_STMT: self.parse_item(child, parent_node, program) else: if compound_stmt is None: compound_stmt = Node( self.tokenizers['RES'].get_token('COMPOUND_STMT'), is_reserved=True, parent=parent_node) self.parse_item(child, compound_stmt, program) # For while statement, only take first child and compound statements as children elif ast_item.kind == CursorKind.WHILE_STMT: for index, child in enumerate(ast_item.get_children()): if index == 0 or (index > 0 and child.kind == CursorKind.COMPOUND_STMT): self.parse_item(child, parent_node, program) # Standard case, process all the children of the node recursively else: for child in ast_item.get_children(): # Param declarations, arguments alreadly handled. # Also skip structure declarations if parent is declarator # And skip compound statements if the parent is a constructor # And skip array sizes if type of var decl is array, only consider the init list expressions if not(child.kind == CursorKind.PARM_DECL or child.spelling in arguments \ or (ast_item.kind == CursorKind.STRUCT_DECL and self.tokenizers['RES'].get_label(parent_node.token) == 'DECLARATOR') \ or (parent_node and parent_node.token and self.tokenizers['RES'].get_label(parent_node.token) == 'CONSTRUCTOR' and child.kind != CursorKind.COMPOUND_STMT)\ or (ast_item.kind in [CursorKind.VAR_DECL, CursorKind.FIELD_DECL] and 'TYPE_ARRAY' in [self.tokenizers['RES'].get_label(c.token) for c in parent_node.parent.children[0].children] + [self.tokenizers['RES'].get_label(c.token) for c in parent_node.parent.children[1].children] and child.kind != CursorKind.INIT_LIST_EXPR)): self.parse_item(child, parent_node, program)
def handle_call_expr(self, ast_item, parent_node, parse_item, program): func_name = None if ast_item.referenced: if 'struct ' in ast_item.type.spelling: return parent_node func_name = ast_item.referenced.spelling decl_line = ast_item.referenced.location.line else: for child in ast_item.get_children(): if child.type.kind == TypeKind.OVERLOAD: func_node = list(child.get_children())[0] func_name = func_node.spelling break if not func_name: return parent_node decl_line = None if not func_name: func_name = "FUNCTION_CALL" func_name = re.sub(r'\s+|,+', '', func_name) # Check for pre and post if we have ++ and -- operators if func_name in ['operator++', 'operator--']: tokens = [t.spelling for t in ast_item.get_tokens()] if '++' == tokens[0] or '-' == tokens[0]: func_name += '_PRE' else: func_name += '_POST' special_call_expr = [ 'vector', 'unordered_map', 'pair', 'map', 'queue', 'greater', 'priority_queue', 'bitset', 'multiset', 'set', 'string' ] # if func_name in special_call_expr and len(list(ast_item.get_children())) == 0: # print(func_name, ast_item.spelling, program, ast_item.extent) # return parent_node # else: if func_name in special_call_expr \ or (ast_item.referenced and ast_item.referenced.kind == CursorKind.CONSTRUCTOR and len(list(ast_item.get_children())) > 0): # Do not call expressions with const before it item_type = ast_item.type.spelling.replace('const', '') if func_name == 'pair' and len(list(ast_item.get_children())) <= 1: return parent_node elif func_name == 'pair': func_call = Node(self.tokenizers['RES'].get_token( ast_item.kind.name), is_reserved=True, parent=parent_node) ref = Node(self.tokenizers['RES'].get_token('REF_BUILTIN'), is_reserved=True, parent=func_call) Node(self.tokenizers['NAME_BUILTIN'].get_token('make_pair'), is_reserved=False, parent=ref) else: func_call = Node( self.tokenizers['RES'].get_token('TYPE_CALL_EXPR'), is_reserved=True, parent=parent_node) type_kind = Node(self.tokenizers['RES'].get_token('TYPE_KIND'), is_reserved=True, parent=func_call) if func_name == 'set': item_type = f"set<{item_type.split('<')[-1].split(',')[0].replace('>', '').strip()}>" self.extract_builtin_type(item_type, type_kind) return Node(self.tokenizers['RES'].get_token('ARGUMENTS'), is_reserved=True, parent=func_call) else: func_call = Node(self.tokenizers['RES'].get_token( ast_item.kind.name), is_reserved=True, parent=parent_node) if ast_item.referenced is not None and not "tmp" in str( ast_item.referenced.location): if func_name == 'sync_with_stdio': func_name = 'ios::sync_with_stdio' ref = Node(self.tokenizers['RES'].get_token('REF_BUILTIN'), is_reserved=True, parent=func_call) Node(self.tokenizers['NAME_BUILTIN'].get_token(func_name), is_reserved=False, parent=ref) else: ref = Node(self.tokenizers['RES'].get_token('REF'), is_reserved=True, parent=func_call) Node(self.tokenizers['NAME'].get_token(func_name), is_reserved=False, parent=ref, decl_line=decl_line) if len(list(ast_item.get_arguments())) > 0: arg_node = Node(self.tokenizers['RES'].get_token('ARGUMENTS'), is_reserved=True, parent=func_call) for arg_item in ast_item.get_arguments(): parse_item(arg_item, arg_node, program) return func_call
def create_subtree(self, tokens, operators, parent_node): for op, idx in operators: if idx + 1 < len(tokens) - 1: op_node = Node( self.tokenizers['RES'].get_token(f'BINARY_OPERATOR_{op}'), is_reserved=True, parent=parent_node) t = tokens[idx - 1] if t.kind == TokenKind.IDENTIFIER: ref = Node(self.tokenizers['RES'].get_token('REF'), is_reserved=True, parent=op_node) Node(self.tokenizers['NAME'].get_token(t.spelling), is_reserved=False, parent=ref) else: lit = Node( self.tokenizers['RES'].get_token('INTEGER_LITERAL'), is_reserved=True, parent=op_node) Node(self.tokenizers['LITERAL'].get_token(t.spelling), is_reserved=False, parent=lit) operators.remove((op, idx)) self.create_subtree(tokens, operators, op_node) else: op_node = Node( self.tokenizers['RES'].get_token(f'BINARY_OPERATOR_{op}'), is_reserved=True, parent=parent_node) for t in [tokens[idx - 1], tokens[idx + 1]]: if t.kind == TokenKind.IDENTIFIER: ref = Node(self.tokenizers['RES'].get_token('REF'), is_reserved=True, parent=op_node) Node(self.tokenizers['NAME'].get_token(t.spelling), is_reserved=False, parent=ref) else: lit = Node(self.tokenizers['RES'].get_token( 'INTEGER_LITERAL'), is_reserved=True, parent=op_node) Node(self.tokenizers['LITERAL'].get_token(t.spelling), is_reserved=False, parent=lit) if len(tokens) == 1: t = tokens[0] if t.kind == TokenKind.IDENTIFIER: ref = Node(self.tokenizers['RES'].get_token('REF'), is_reserved=True, parent=parent_node) Node(self.tokenizers['NAME'].get_token(t.spelling), is_reserved=False, parent=ref) else: lit = Node(self.tokenizers['RES'].get_token('INTEGER_LITERAL'), is_reserved=True, parent=parent_node) Node(self.tokenizers['LITERAL'].get_token(t.spelling), is_reserved=False, parent=lit)
def handle_type(self, ast_item, parent_node, children=None, recursion_level=0): canonical_type = ast_item.type.get_canonical() node_type = ast_item.type.spelling if self.tokenizers['RES'].get_label(parent_node.token) == 'TYPE_DEF': node_type = ast_item.underlying_typedef_type.spelling elif children is None: parent_node = Node(self.tokenizers['RES'].get_token('TYPE_KIND'), is_reserved=True, parent=parent_node) while canonical_type.kind == TypeKind.POINTER: parent_node = Node(self.tokenizers['RES'].get_token('POINTER'), is_reserved=True, parent=parent_node) canonical_type = canonical_type.get_pointee() if utils.is_function( ast_item ) or canonical_type.kind == TypeKind.FUNCTIONPROTO: #self.tokenizers['RES'].get_label(parent_node.token) == 'FUNCTION_DECL': canonical_type = ast_item.type.get_result() node_type = ast_item.type.get_result().spelling while canonical_type.kind == TypeKind.POINTER: parent_node = Node(self.tokenizers['RES'].get_token('POINTER'), is_reserved=True, parent=parent_node) canonical_type = canonical_type.get_pointee() node_type = canonical_type.get_pointee() if canonical_type.is_const_qualified() or node_type.startswith( 'const'): parent_node = Node( self.tokenizers['RES'].get_token('CONST_QUALIFIED'), is_reserved=True, parent=parent_node) for token in ast_item.get_tokens(): if 'auto' == token.spelling: type_node = Node(self.tokenizers['RES'].get_token('TYPE'), is_reserved=True, parent=parent_node) Node(self.tokenizers['TYPE'].get_token('auto'), is_reserved=False, parent=type_node) return break if canonical_type.kind in [ TypeKind.CONSTANTARRAY, TypeKind.VARIABLEARRAY, TypeKind.INCOMPLETEARRAY, TypeKind.DEPENDENTSIZEDARRAY ]: parent_node = Node(self.tokenizers['RES'].get_token('TYPE_ARRAY'), is_reserved=True, parent=parent_node) array_sizes_node = Node( self.tokenizers['RES'].get_token('ARRAY_SIZES'), is_reserved=True, parent=parent_node) for array_size in re.findall('\[.*?\]', ast_item.type.spelling): # If it does not only consist of numbers then it is a reference to a variable if not all(str(s).isdigit() for s in array_size[1:-1]): # array_size_type = Node(self.tokenizers['RES'].get_token("REF"), is_reserved=True, parent=array_sizes_node) self.extract_variable_array_sizes_subtree( array_size[1:-1], array_sizes_node) # Node(self.tokenizers['NAME'].get_token(array_size[1:-1]), is_reserved=False, parent=array_size_type) else: array_size_type = Node( self.tokenizers['RES'].get_token("INTEGER_LITERAL"), is_reserved=True, parent=array_sizes_node) Node(self.tokenizers['LITERAL'].get_token( array_size[1:-1]), is_reserved=False, parent=array_size_type) node_type = re.sub('\[.*?\]', '', node_type) # For example: int& a = x (left value reference) or int&& b = 30 (right value reference) elif canonical_type.kind == TypeKind.LVALUEREFERENCE or canonical_type.kind == TypeKind.RVALUEREFERENCE: parent_node = Node(self.tokenizers['RES'].get_token( ast_item.type.kind.name), is_reserved=True, parent=parent_node) self.extract_builtin_type(node_type, parent_node)
def handle_declaration(self, ast_item, parent_node, parse_item, program): if utils.is_function(ast_item): if (ast_item.kind == CursorKind.FUNCTION_TEMPLATE): template_decl = Node( self.tokenizers['RES'].get_token('TEMPLATE_DECL'), is_reserved=True, parent=parent_node) for child in ast_item.get_children(): if child.kind == CursorKind.TEMPLATE_TYPE_PARAMETER: templ_param = Node(self.tokenizers['RES'].get_token( child.kind.name), is_reserved=True, parent=template_decl) self.create_terminal_nodes(child.spelling, child, templ_param) func_decl = Node(self.tokenizers['RES'].get_token( ast_item.kind.name), is_reserved=True, parent=parent_node) if ast_item.access_specifier != AccessSpecifier.INVALID and self.tokenizers[ 'RES'].get_label(parent_node.token) != 'root': acc_spec = Node( self.tokenizers['RES'].get_token('ACCESS_SPECIFIER'), is_reserved=True, parent=func_decl) Node(self.tokenizers['RES'].get_token( ast_item.access_specifier.name), is_reserved=True, parent=acc_spec) name = Node(self.tokenizers['RES'].get_token('NAME'), is_reserved=True, parent=func_decl) self.create_terminal_nodes(ast_item.spelling, ast_item, name) # Node(self.tn.get_token(ast_item.spelling), is_reserved=False, parent=name) if ast_item.kind != CursorKind.CONSTRUCTOR: self.handle_type(ast_item, func_decl) if ast_item.is_const_method(): Node(self.tokenizers['RES'].get_token('CONST'), is_reserved=True, parent=func_decl) children = ast_item.get_children() for child in children: parm_declarations = False if child.kind == CursorKind.PARM_DECL: if not parm_declarations: parm_decl = Node( self.tokenizers['RES'].get_token('PARM_DECL'), is_reserved=True, parent=func_decl) parm_declarations = True self.handle_type(child, parm_decl) declarator = Node( self.tokenizers['RES'].get_token('DECLARATOR'), is_reserved=True, parent=parm_decl) reference = Node(self.tokenizers['RES'].get_token('NAME'), is_reserved=True, parent=declarator) self.create_terminal_nodes(child.spelling, child, reference) for c in child.get_children(): parse_item(c, declarator, program) # Get children of constructor which are not parameters or compount statements, thus are constructor initializers if ast_item.kind == CursorKind.CONSTRUCTOR: constructor_inits = [ child for child in ast_item.get_children() if child.kind != CursorKind.COMPOUND_STMT and child.kind != CursorKind.PARM_DECL ] for i in range(0, len(constructor_inits), 2): constr_init = Node(self.tokenizers['RES'].get_token( 'CONSTRUCTOR_INITIALIZER'), is_reserved=True, parent=func_decl) member_ref = Node(self.tokenizers['RES'].get_token( constructor_inits[i].kind.name), is_reserved=True, parent=constr_init) self.create_terminal_nodes(constructor_inits[i].spelling, constructor_inits[i], member_ref) parse_item(constructor_inits[i + 1], constr_init, program) return func_decl elif utils.is_class(ast_item): if ast_item.kind == CursorKind.CLASS_TEMPLATE: template_decl = Node( self.tokenizers['RES'].get_token('TEMPLATE_DECL'), is_reserved=True, parent=parent_node) for child in ast_item.get_children(): if child.kind == CursorKind.TEMPLATE_TYPE_PARAMETER: templ_param = Node(self.tokenizers['RES'].get_token( child.kind.name), is_reserved=True, parent=template_decl) self.create_terminal_nodes(child.spelling, child, templ_param) class_decl = Node(self.tokenizers['RES'].get_token('CLASS_DECL'), is_reserved=True, parent=parent_node) name = Node(self.tokenizers['RES'].get_token('NAME'), is_reserved=True, parent=class_decl) self.create_terminal_nodes(ast_item.spelling, ast_item, name) cmpnd_stmt = Node( self.tokenizers['RES'].get_token('COMPOUND_STMT'), is_reserved=True, parent=class_decl) return cmpnd_stmt elif ast_item.kind == CursorKind.VAR_DECL or ast_item.kind == CursorKind.FIELD_DECL or ast_item.kind == CursorKind.UNEXPOSED_DECL: var_decl = Node(self.tokenizers['RES'].get_token( ast_item.kind.name), is_reserved=True, parent=parent_node) if ast_item.access_specifier != AccessSpecifier.INVALID and ast_item.kind == CursorKind.FIELD_DECL: acc_spec = Node( self.tokenizers['RES'].get_token('ACCESS_SPECIFIER'), is_reserved=True, parent=var_decl) Node(self.tokenizers['RES'].get_token( ast_item.access_specifier.name), is_reserved=True, parent=acc_spec) self.handle_type(ast_item, var_decl) declarator = Node(self.tokenizers['RES'].get_token('DECLARATOR'), is_reserved=True, parent=var_decl) reference = Node(self.tokenizers['RES'].get_token('NAME'), is_reserved=True, parent=declarator) self.create_terminal_nodes(ast_item.spelling, ast_item, reference) return declarator elif utils.is_struct(ast_item): # If parent is declarator we are declaring a structure variable so we # do not want to declare the enitre structure again if self.tokenizers['RES'].get_label( parent_node.token) != 'DECLARATOR': class_decl = Node( self.tokenizers['RES'].get_token('STRUCT_DECL'), is_reserved=True, parent=parent_node) name = Node(self.tokenizers['RES'].get_token('NAME'), is_reserved=True, parent=class_decl) self.create_terminal_nodes(ast_item.spelling, ast_item, name) cmpnd_stmt = Node( self.tokenizers['RES'].get_token('COMPOUND_STMT'), is_reserved=True, parent=class_decl) return cmpnd_stmt return parent_node