示例#1
0
 def handle_label_stmt(self, ast_item, parent_node):
     stmt = Node(self.tokenizers['RES'].get_token(ast_item.kind.name),
                 is_reserved=True,
                 parent=parent_node)
     name = Node(self.tokenizers['RES'].get_token('NAME'),
                 is_reserved=True,
                 parent=parent_node)
     self.create_terminal_nodes(ast_item.spelling, ast_item, name)
     # Node(self.tn.get_token(ast_item.spelling), is_reserved=False, parent=stmt)
     return stmt
示例#2
0
 def handle_type_ref(self, ast_item, parent_node):
     if not 'std' in ast_item.type.spelling:
         type_ref = Node(self.tokenizers['RES'].get_token(
             ast_item.kind.name),
                         is_reserved=True,
                         parent=parent_node)
         Node(self.tokenizers['NAME'].get_token(ast_item.type.spelling),
              is_reserved=False,
              parent=type_ref,
              decl_line=ast_item.referenced.location.line)
示例#3
0
    def handle_lambda_expr(self, ast_item, parent_node, parse_item, program):
        lambda_expr = Node(self.tokenizers['RES'].get_token(
            ast_item.kind.name),
                           is_reserved=True,
                           parent=parent_node)
        tokens = [t.spelling for t in ast_item.get_tokens()][1:]
        capture_clause_ended = False
        capture_clause_tokens = []
        for token in tokens:
            if token == ']':
                capture_clause_ended = True

            if capture_clause_ended:
                break

            capture_clause_tokens.append(token)

        capture_clauses = ''.join(capture_clause_tokens).split(',')

        for capture_clause in capture_clauses:
            capt_clause_node = Node(
                self.tokenizers['RES'].get_token('CAPTURE_CLAUSE'),
                is_reserved=True,
                parent=lambda_expr)
            self.create_terminal_nodes(capture_clause, ast_item,
                                       capt_clause_node)

        children = ast_item.get_children()
        for child in children:
            parm_declarations = False
            if child.kind == CursorKind.PARM_DECL:
                if not parm_declarations:
                    parm_decl = Node(
                        self.tokenizers['RES'].get_token('PARM_DECL'),
                        is_reserved=True,
                        parent=lambda_expr)
                    parm_declarations = True

                self.handle_type(child, parm_decl)

                declarator = Node(
                    self.tokenizers['RES'].get_token('DECLARATOR'),
                    is_reserved=True,
                    parent=parm_decl)
                reference = Node(self.tokenizers['RES'].get_token('NAME'),
                                 is_reserved=True,
                                 parent=declarator)
                self.create_terminal_nodes(child.spelling, child, reference)

                for c in child.get_children():
                    parse_item(c, declarator, program)

        return lambda_expr
示例#4
0
    def create_type_node(self, type_string, parent_node):
        if type_string.isdigit():
            type_node = Node(
                self.tokenizers['RES'].get_token('INTEGER_LITERAL'),
                is_reserved=True,
                parent=parent_node)
            Node(self.tokenizers['LITERAL'].get_token(type_string),
                 is_reserved=False,
                 parent=type_node)

        elif type_string in self.builtin_types:
            record_type = Node(self.tokenizers['RES'].get_token('TYPE'),
                               is_reserved=True,
                               parent=parent_node)
            Node(self.tokenizers['TYPE'].get_token(type_string),
                 is_reserved=False,
                 parent=record_type)

        elif 'type-parameter' in type_string:
            record_type = Node(self.tokenizers['RES'].get_token('TYPE'),
                               is_reserved=True,
                               parent=parent_node)
            Node(self.tokenizers['TYPE'].get_token('T'),
                 is_reserved=False,
                 parent=record_type)

        else:
            record_type = Node(self.tokenizers['RES'].get_token('TYPE_REF'),
                               is_reserved=True,
                               parent=parent_node)
            Node(self.tokenizers['NAME'].get_token(type_string),
                 is_reserved=False,
                 parent=record_type)
示例#5
0
    def build_type_node(self, type_string, record_type, parent_node):
        if record_type:
            type_record = Node(self.tokenizers['RES'].get_token('TYPE_RECORD'),
                               is_reserved=True,
                               parent=parent_node)
            self.create_type_node(type_string, type_record)

            return Node(
                self.tokenizers['RES'].get_token('TYPE_RECORD_ELEMENTS'),
                is_reserved=True,
                parent=type_record)

        else:
            self.create_type_node(type_string, parent_node)
示例#6
0
 def handle_func_cast_expr(self, ast_item, parent_node):
     func_cast_expr = Node(self.tokenizers['RES'].get_token(
         ast_item.kind.name),
                           is_reserved=True,
                           parent=parent_node)
     self.handle_type(ast_item, func_cast_expr)
     return func_cast_expr
示例#7
0
    def handle_static_cast_expr(self, ast_item, parent_node):
        static_cast = Node(self.tokenizers['RES'].get_token(
            ast_item.kind.name),
                           is_reserved=True,
                           parent=parent_node)
        self.handle_type(ast_item, static_cast)

        return static_cast
示例#8
0
    def parse_ast(self,
                  program: str,
                  imports: list = None,
                  thread_nr: int = 0):

        os.makedirs('temp', exist_ok=True)

        # Create temp file path for each trhead for clang to save in memory contents
        temp_file_path = os.path.join('temp', f'tmp{thread_nr}.cpp')

        # Set arguments and add compiler system include paths (with ccsyspath)
        args = '-x c++ --std=c++20'.split()
        syspath = ccsyspath.system_include_paths('clang')
        incargs = [b'-I' + inc for inc in syspath]
        args = args + incargs

        if imports is None:
            program, imports = extract_imports(program)
        else:
            imports = [
                ele for ele in imports[1:-1].split("'")
                if ele != '' and ele != ', '
            ]

        # Preprocess the program, expand the macros
        preprocessed_program = self.preprocess_program(program, temp_file_path,
                                                       imports)

        # print(preprocessed_program)

        # Parse the program to a clang AST
        tu = self.index.parse(temp_file_path,
                              unsaved_files=[(temp_file_path,
                                              preprocessed_program)],
                              args=args,
                              options=0)

        # Retrieve only the cursor items (children) that contain the program code (no import code)
        cursor_items = self.get_cursor_items(tu.cursor, temp_file_path)

        # Create a root node
        root_node = Node(self.tokenizers['RES'].get_token('root'),
                         is_reserved=True)

        # for cursor_item in cursor_items:
        #     for c in cursor_item.walk_preorder():
        #         print(f'spelling: {c.spelling}, kind: {c.kind.name}, type spelling: {c.type.spelling}, return type: {c.type.get_result().spelling}, type kind: {c.type.kind}')

        # Parse each cursor item
        for cursor_item in cursor_items:
            self.parse_item(cursor_item, root_node, program)

        shutil.rmtree('temp')

        # Return the root node filled with children to form the AST
        return root_node
示例#9
0
    def handle_operator(self, ast_item, parent_node):
        if ast_item.kind == CursorKind.BINARY_OPERATOR or ast_item.kind == CursorKind.COMPOUND_ASSIGNMENT_OPERATOR:
            try:
                operator_index = len(
                    list(list(ast_item.get_children())[0].get_tokens()))
                op_name = [
                    list(ast_item.get_tokens())[operator_index].spelling
                ]
            except IndexError:
                len_first_child = len([
                    t.spelling
                    for t in list(ast_item.get_children())[0].get_tokens()
                ])
                len_second_child = len([
                    t.spelling
                    for t in list(ast_item.get_children())[1].get_tokens()
                ])
                operator_index = len_first_child - len_second_child - 1
                try:
                    op_name = [
                        list(ast_item.get_tokens())[operator_index].spelling
                    ]
                except IndexError:
                    op_name = [[
                        t.spelling
                        for t in list(ast_item.get_children())[1].get_tokens()
                    ][0]]

        elif ast_item.kind == CursorKind.UNARY_OPERATOR:
            tokens = list(ast_item.get_tokens())
            if utils.is_operator_token(tokens[0].spelling):
                op_name = tokens[0].spelling
                if op_name in ['++', '--']:
                    op_name = 'PRE_' + op_name
            elif utils.is_operator_token(tokens[-1].spelling):
                op_name = tokens[-1].spelling
                if op_name in ['++', '--']:
                    op_name = 'POST_' + op_name
            else:
                print(
                    f'UNARY OPERATOR EXCEPTION: {[t.spelling for t in tokens]}'
                )
                op_name = ''

            op_name = [op_name]

        else:
            op_name = utils.get_operator(ast_item)

        operator = Node(
            self.tokenizers['RES'].get_token(ast_item.kind.name.strip() + '_' +
                                             '_'.join(op_name)),
            is_reserved=True,
            parent=parent_node)

        return operator
示例#10
0
    def handle_typedef(self, ast_item, parent_node):
        # Set top node as TYPEDEF_DECL
        typedef_decl = Node(self.tokenizers['RES'].get_token(
            ast_item.kind.name),
                            is_reserved=True,
                            parent=parent_node)
        # Set first child als TYPE_DEF
        type_def = Node(self.tokenizers['RES'].get_token('TYPE_DEF'),
                        is_reserved=True,
                        parent=typedef_decl)

        self.handle_type(ast_item, type_def)

        # Set second child as IDENTIFIER
        identifier = Node(self.tokenizers['RES'].get_token('IDENTIFIER'),
                          is_reserved=True,
                          parent=typedef_decl)
        # Set value of IDENTIFIER to spelling of node
        self.create_terminal_nodes(ast_item.spelling, ast_item, identifier)
示例#11
0
    def handle_literal(self, ast_item, parent_node, program):
        if self.tokenizers['RES'].get_label(
                parent_node.token) != 'COMPOUND_STMT':
            lit_type = Node(self.tokenizers['RES'].get_token(
                ast_item.kind.name),
                            is_reserved=True,
                            parent=parent_node)

            token = next(ast_item.get_tokens(), None)
            if token:
                Node(self.tokenizers['LITERAL'].get_token(token.spelling),
                     is_reserved=False,
                     parent=lit_type)
            else:
                lit = program[ast_item.extent.start.offset:ast_item.extent.end.
                              offset]
                Node(self.tokenizers['LITERAL'].get_token(lit),
                     is_reserved=False,
                     parent=lit_type)
示例#12
0
    def handle_for_range(self, ast_item, parent_node):
        stmt = Node(self.tokenizers['RES'].get_token(ast_item.kind.name),
                    is_reserved=True,
                    parent=parent_node)

        # Handle first child that is always a variable declaration
        first_child = next(ast_item.get_children())
        var_decl = Node(self.tokenizers['RES'].get_token(
            first_child.kind.name),
                        is_reserved=True,
                        parent=stmt)
        self.handle_type(first_child, var_decl)

        declarator = Node(self.tokenizers['RES'].get_token('DECLARATOR'),
                          is_reserved=True,
                          parent=var_decl)
        reference = Node(self.tokenizers['RES'].get_token('NAME'),
                         is_reserved=True,
                         parent=declarator)
        self.create_terminal_nodes(first_child.spelling, first_child,
                                   reference)

        return stmt
示例#13
0
    def create_terminal_nodes(self, label, ast_item, parent_node, tokens=None):
        if ast_item.referenced is not None:
            decl_line = ast_item.referenced.location.line
        else:
            decl_line = ast_item.location.line

        if self.split_terminals:
            # Splilt label by: '[', ']', '<', '>', ' ', '::', ','
            split_label = [
                el for el in re.split('(\[|\]|<|>| |::|,)', label)
                if len(el.strip()) > 0
            ]

            for label in split_label:
                Node(self.tokenizers['NAME'].get_token(label),
                     is_reserved=False,
                     parent=parent_node,
                     decl_line=decl_line)
        else:
            Node(self.tokenizers['NAME'].get_token(label),
                 is_reserved=False,
                 parent=parent_node,
                 decl_line=decl_line)
示例#14
0
    def extract_builtin_type(self, type_string, parent_node):
        for _ in range(len(re.findall('\*', type_string))):
            parent_node = Node(self.tokenizers['RES'].get_token('POINTER'),
                               is_reserved=True,
                               parent=parent_node)

        type_string = type_string.replace('*', '')\
                                 .replace('&', '')\
                                 .replace('const', '')\
                                 .replace('std::', '')\
                                 .replace('::value_type', '')\
                                 .replace('*', '')\
                                 .replace('&', '')\
                                 .replace('struct', '').strip()

        # Remove spaces between non-alphanumeric characters and commas
        type_string = re.sub('\s*([^A-Za-z,])\s*', r'\1', type_string)

        self.build_type_subtree(type_string, parent_node)
示例#15
0
    def handle_reference(self, ast_item, parent_node):
        if parent_node:
            parent_func_name = [
                '' if n.children[0].res else self.tokenizers['NAME'].get_label(
                    n.children[0].token.split('::')[-1])
                for n in parent_node.children
                if self.tokenizers['RES'].get_label(
                    n.token) in ['NAME', 'REF', 'REF_BUILTIN'] and self.
                tokenizers['RES'].get_label(parent_node.token) == 'CALL_EXPR'
            ]
        else:
            parent_func_name = []

        if ast_item.spelling \
        and ast_item.spelling not in parent_func_name:
            # and not (self.tokenizers['RES'].get_label(parent_node.token) == 'DECLARATOR' and 'REF' in ast_item.kind.name):
            # print('AFTER: ', ast_item.spelling, ast_item.extent)

            if 'tmp' not in str(ast_item.referenced.location):
                reference = Node(
                    self.tokenizers['RES'].get_token('REF_BUILTIN'),
                    True,
                    parent=parent_node)
                Node(self.tokenizers['NAME_BUILTIN'].get_token(
                    ast_item.spelling),
                     False,
                     parent=reference)
            else:
                reference = Node(self.tokenizers['RES'].get_token('REF'),
                                 True,
                                 parent=parent_node)
                Node(self.tokenizers['NAME'].get_token(ast_item.spelling),
                     False,
                     parent=reference,
                     decl_line=ast_item.referenced.location.line)
            return reference

        elif not ast_item.spelling and ast_item.kind == CursorKind.MEMBER_REF_EXPR:
            tokens = [t.spelling for t in ast_item.get_tokens()]
            member_ref = tokens[tokens.index('.') + 1]
            reference = Node(self.tokenizers['RES'].get_token('REF'),
                             is_reserved=True,
                             parent=parent_node)
            Node(self.tokenizers['NAME'].get_token(member_ref),
                 is_reserved=False,
                 parent=reference,
                 decl_line=ast_item.referenced.location.line)
            return reference
示例#16
0
    def parse_item(self, ast_item, parent_node, program):
        # Useless AST primitives
        skip_kinds = [
            CursorKind.UNEXPOSED_EXPR, CursorKind.OVERLOADED_DECL_REF,
            CursorKind.TEMPLATE_REF
        ]

        # print(ast_item.spelling, ast_item.kind.name, ast_item.type.spelling, [t.spelling for t in ast_item.get_tokens()])

        # Skip useless AST primitives and exceptions -> continue straight with their children
        if ast_item.kind in skip_kinds \
        or 'operatorbool' == ast_item.spelling \
        or 'operator bool' == ast_item.spelling \
        or (('std::string' == ast_item.type.spelling \
        or 'basic_string' == ast_item.spelling) \
        and ast_item.kind in [CursorKind.TYPE_REF, CursorKind.CALL_EXPR]):
            pass

        # Parse typdef
        elif utils.is_typedef(ast_item):
            self.nh.handle_typedef(ast_item, parent_node)

        # parse declaration
        elif ast_item.kind.is_declaration():
            parent_node = self.nh.handle_declaration(ast_item, parent_node,
                                                     self.parse_item, program)

        # parse operator
        elif utils.is_operator(ast_item):
            parent_node = self.nh.handle_operator(ast_item, parent_node)

        # parse literal
        elif utils.is_literal(ast_item):
            self.nh.handle_literal(ast_item, parent_node, program)

        # parse call expression
        elif utils.is_call_expr(ast_item):
            parent_node = self.nh.handle_call_expr(ast_item, parent_node,
                                                   self.parse_item, program)

        # parse reference
        elif utils.is_reference(ast_item):
            p_node = self.nh.handle_reference(ast_item, parent_node)

            if p_node:
                parent_node = p_node

        # parse type ref
        elif ast_item.kind == CursorKind.TYPE_REF \
            and parent_node\
            and self.tokenizers['RES'].get_label(parent_node.token) not in ['root', 'DECLARATOR',
            'FUNCTION_DECL', 'FUNCTION_TEMPLATE', 'ARGUMENTS',
            'CXX_FUNCTIONAL_CAST_EXPR']:
            self.nh.handle_type_ref(ast_item, parent_node)

        # Parse for range -> for(int a:v) {...}
        elif ast_item.kind == CursorKind.CXX_FOR_RANGE_STMT:
            parent_node = self.nh.handle_for_range(ast_item, parent_node)

        # Parse cast expressions -> (int) a
        elif ast_item.kind == CursorKind.CSTYLE_CAST_EXPR:
            parent_node = self.nh.handle_cast_expr(ast_item, parent_node)

        elif ast_item.kind == CursorKind.CXX_FUNCTIONAL_CAST_EXPR:
            parent_node = self.nh.handle_func_cast_expr(ast_item, parent_node)

        elif ast_item.kind == CursorKind.LAMBDA_EXPR:
            parent_node = self.nh.handle_lambda_expr(ast_item, parent_node,
                                                     self.parse_item, program)

        elif ast_item.kind == CursorKind.CXX_STATIC_CAST_EXPR:
            parent_node = self.nh.handle_static_cast_expr(
                ast_item, parent_node)

        elif ast_item.kind == CursorKind.LABEL_REF:
            self.nh.handle_reference(ast_item, parent_node)

        elif ast_item.kind == CursorKind.LABEL_STMT:
            parent_node = self.nh.handle_label_stmt(ast_item, parent_node)

        # if not one of the above -> create simple parent node of the kind of the item
        elif ast_item.kind != CursorKind.TYPE_REF:
            # print(ast_item.spelling, ast_item.kind.name)
            parent_node = Node(self.tokenizers['RES'].get_token(
                ast_item.kind.name),
                               is_reserved=True,
                               parent=parent_node)

        # Do not iterate through children that we have already treated as arguments
        arguments = []
        if utils.is_call_expr(ast_item):
            arguments = [c.spelling for c in ast_item.get_arguments()]

        # Already handled first child of for range statement, so start from second child
        if ast_item.kind == CursorKind.CXX_FOR_RANGE_STMT:
            for index, child in enumerate(list(ast_item.get_children())[1:]):
                # Add compound statment -> {...} if this is missing
                if index == len(list(ast_item.get_children(
                ))[1:]) - 1 and child.kind != CursorKind.COMPOUND_STMT:
                    compound_stmt = Node(
                        self.tokenizers['RES'].get_token('COMPOUND_STMT'),
                        is_reserved=True,
                        parent=parent_node)
                    self.parse_item(child, compound_stmt, program)
                else:
                    self.parse_item(child, parent_node, program)

        # Handle one liner if/while statements with no compound statement (={..}) as children -> if (...) return x; ADD COMPOUND STATEMENT ANYWAY
        elif (ast_item.kind == CursorKind.IF_STMT or ast_item.kind == CursorKind.WHILE_STMT)\
             and any(CursorKind.COMPOUND_STMT != child.kind for child in list(ast_item.get_children())[1:]):
            for index, child in enumerate(ast_item.get_children()):
                # print(child.spelling, child.kind.name, child.type.spelling, index)
                if (
                        index != 1
                        and index < len(list(ast_item.get_children())) - 1
                ) or child.kind == CursorKind.COMPOUND_STMT or child.kind == CursorKind.IF_STMT:
                    self.parse_item(child, parent_node, program)
                else:
                    compound_stmt = Node(
                        self.tokenizers['RES'].get_token('COMPOUND_STMT'),
                        is_reserved=True,
                        parent=parent_node)
                    self.parse_item(child, compound_stmt, program)

        # Handle for statements with no compound statement. ADD COMPOUND STATEMENT
        elif (ast_item.kind == CursorKind.FOR_STMT or ast_item.kind
              == CursorKind.CXX_FOR_RANGE_STMT) and list(ast_item.get_children(
              ))[-1].kind != CursorKind.COMPOUND_STMT:
            compound_stmt = None
            for index, child in enumerate(ast_item.get_children()):
                if index < len(list(ast_item.get_children())
                               ) - 1 or child.kind == CursorKind.COMPOUND_STMT:
                    self.parse_item(child, parent_node, program)
                else:
                    if compound_stmt is None:
                        compound_stmt = Node(
                            self.tokenizers['RES'].get_token('COMPOUND_STMT'),
                            is_reserved=True,
                            parent=parent_node)
                    self.parse_item(child, compound_stmt, program)

        # For while statement, only take first child and compound statements as children
        elif ast_item.kind == CursorKind.WHILE_STMT:
            for index, child in enumerate(ast_item.get_children()):
                if index == 0 or (index > 0
                                  and child.kind == CursorKind.COMPOUND_STMT):
                    self.parse_item(child, parent_node, program)

        # Standard case, process all the children of the node recursively
        else:
            for child in ast_item.get_children():
                # Param declarations, arguments alreadly handled.
                # Also skip structure declarations if parent is declarator
                # And skip compound statements if the parent is a constructor
                # And skip array sizes if type of var decl is array, only consider the init list expressions
                if not(child.kind == CursorKind.PARM_DECL or child.spelling in arguments \
                    or (ast_item.kind == CursorKind.STRUCT_DECL
                    and self.tokenizers['RES'].get_label(parent_node.token) == 'DECLARATOR') \
                    or (parent_node and parent_node.token
                    and self.tokenizers['RES'].get_label(parent_node.token) == 'CONSTRUCTOR'
                    and child.kind != CursorKind.COMPOUND_STMT)\
                    or (ast_item.kind in [CursorKind.VAR_DECL, CursorKind.FIELD_DECL]
                    and 'TYPE_ARRAY' in [self.tokenizers['RES'].get_label(c.token) for c in parent_node.parent.children[0].children] +
                     [self.tokenizers['RES'].get_label(c.token) for c in parent_node.parent.children[1].children]
                    and child.kind != CursorKind.INIT_LIST_EXPR)):

                    self.parse_item(child, parent_node, program)
示例#17
0
    def handle_call_expr(self, ast_item, parent_node, parse_item, program):
        func_name = None

        if ast_item.referenced:
            if 'struct ' in ast_item.type.spelling:
                return parent_node
            func_name = ast_item.referenced.spelling
            decl_line = ast_item.referenced.location.line
        else:
            for child in ast_item.get_children():
                if child.type.kind == TypeKind.OVERLOAD:
                    func_node = list(child.get_children())[0]
                    func_name = func_node.spelling
                    break

            if not func_name:
                return parent_node

            decl_line = None

        if not func_name:
            func_name = "FUNCTION_CALL"

        func_name = re.sub(r'\s+|,+', '', func_name)

        # Check for pre and post if we have ++ and -- operators
        if func_name in ['operator++', 'operator--']:
            tokens = [t.spelling for t in ast_item.get_tokens()]
            if '++' == tokens[0] or '-' == tokens[0]:
                func_name += '_PRE'
            else:
                func_name += '_POST'

        special_call_expr = [
            'vector', 'unordered_map', 'pair', 'map', 'queue', 'greater',
            'priority_queue', 'bitset', 'multiset', 'set', 'string'
        ]

        # if func_name in special_call_expr and len(list(ast_item.get_children())) == 0:
        #     print(func_name, ast_item.spelling, program, ast_item.extent)
        #     return parent_node
        # else:
        if func_name in special_call_expr \
        or (ast_item.referenced and ast_item.referenced.kind == CursorKind.CONSTRUCTOR and len(list(ast_item.get_children())) > 0):

            # Do not call expressions with const before it
            item_type = ast_item.type.spelling.replace('const', '')

            if func_name == 'pair' and len(list(ast_item.get_children())) <= 1:
                return parent_node
            elif func_name == 'pair':
                func_call = Node(self.tokenizers['RES'].get_token(
                    ast_item.kind.name),
                                 is_reserved=True,
                                 parent=parent_node)
                ref = Node(self.tokenizers['RES'].get_token('REF_BUILTIN'),
                           is_reserved=True,
                           parent=func_call)
                Node(self.tokenizers['NAME_BUILTIN'].get_token('make_pair'),
                     is_reserved=False,
                     parent=ref)
            else:
                func_call = Node(
                    self.tokenizers['RES'].get_token('TYPE_CALL_EXPR'),
                    is_reserved=True,
                    parent=parent_node)
                type_kind = Node(self.tokenizers['RES'].get_token('TYPE_KIND'),
                                 is_reserved=True,
                                 parent=func_call)

                if func_name == 'set':
                    item_type = f"set<{item_type.split('<')[-1].split(',')[0].replace('>', '').strip()}>"
                self.extract_builtin_type(item_type, type_kind)

            return Node(self.tokenizers['RES'].get_token('ARGUMENTS'),
                        is_reserved=True,
                        parent=func_call)

        else:
            func_call = Node(self.tokenizers['RES'].get_token(
                ast_item.kind.name),
                             is_reserved=True,
                             parent=parent_node)

            if ast_item.referenced is not None and not "tmp" in str(
                    ast_item.referenced.location):
                if func_name == 'sync_with_stdio':
                    func_name = 'ios::sync_with_stdio'

                ref = Node(self.tokenizers['RES'].get_token('REF_BUILTIN'),
                           is_reserved=True,
                           parent=func_call)
                Node(self.tokenizers['NAME_BUILTIN'].get_token(func_name),
                     is_reserved=False,
                     parent=ref)
            else:
                ref = Node(self.tokenizers['RES'].get_token('REF'),
                           is_reserved=True,
                           parent=func_call)
                Node(self.tokenizers['NAME'].get_token(func_name),
                     is_reserved=False,
                     parent=ref,
                     decl_line=decl_line)

            if len(list(ast_item.get_arguments())) > 0:
                arg_node = Node(self.tokenizers['RES'].get_token('ARGUMENTS'),
                                is_reserved=True,
                                parent=func_call)

            for arg_item in ast_item.get_arguments():
                parse_item(arg_item, arg_node, program)

        return func_call
示例#18
0
    def create_subtree(self, tokens, operators, parent_node):
        for op, idx in operators:
            if idx + 1 < len(tokens) - 1:
                op_node = Node(
                    self.tokenizers['RES'].get_token(f'BINARY_OPERATOR_{op}'),
                    is_reserved=True,
                    parent=parent_node)
                t = tokens[idx - 1]
                if t.kind == TokenKind.IDENTIFIER:
                    ref = Node(self.tokenizers['RES'].get_token('REF'),
                               is_reserved=True,
                               parent=op_node)
                    Node(self.tokenizers['NAME'].get_token(t.spelling),
                         is_reserved=False,
                         parent=ref)
                else:
                    lit = Node(
                        self.tokenizers['RES'].get_token('INTEGER_LITERAL'),
                        is_reserved=True,
                        parent=op_node)
                    Node(self.tokenizers['LITERAL'].get_token(t.spelling),
                         is_reserved=False,
                         parent=lit)

                operators.remove((op, idx))
                self.create_subtree(tokens, operators, op_node)
            else:
                op_node = Node(
                    self.tokenizers['RES'].get_token(f'BINARY_OPERATOR_{op}'),
                    is_reserved=True,
                    parent=parent_node)

                for t in [tokens[idx - 1], tokens[idx + 1]]:
                    if t.kind == TokenKind.IDENTIFIER:
                        ref = Node(self.tokenizers['RES'].get_token('REF'),
                                   is_reserved=True,
                                   parent=op_node)
                        Node(self.tokenizers['NAME'].get_token(t.spelling),
                             is_reserved=False,
                             parent=ref)
                    else:
                        lit = Node(self.tokenizers['RES'].get_token(
                            'INTEGER_LITERAL'),
                                   is_reserved=True,
                                   parent=op_node)
                        Node(self.tokenizers['LITERAL'].get_token(t.spelling),
                             is_reserved=False,
                             parent=lit)

        if len(tokens) == 1:
            t = tokens[0]
            if t.kind == TokenKind.IDENTIFIER:
                ref = Node(self.tokenizers['RES'].get_token('REF'),
                           is_reserved=True,
                           parent=parent_node)
                Node(self.tokenizers['NAME'].get_token(t.spelling),
                     is_reserved=False,
                     parent=ref)
            else:
                lit = Node(self.tokenizers['RES'].get_token('INTEGER_LITERAL'),
                           is_reserved=True,
                           parent=parent_node)
                Node(self.tokenizers['LITERAL'].get_token(t.spelling),
                     is_reserved=False,
                     parent=lit)
示例#19
0
    def handle_type(self,
                    ast_item,
                    parent_node,
                    children=None,
                    recursion_level=0):
        canonical_type = ast_item.type.get_canonical()
        node_type = ast_item.type.spelling

        if self.tokenizers['RES'].get_label(parent_node.token) == 'TYPE_DEF':
            node_type = ast_item.underlying_typedef_type.spelling

        elif children is None:
            parent_node = Node(self.tokenizers['RES'].get_token('TYPE_KIND'),
                               is_reserved=True,
                               parent=parent_node)

        while canonical_type.kind == TypeKind.POINTER:
            parent_node = Node(self.tokenizers['RES'].get_token('POINTER'),
                               is_reserved=True,
                               parent=parent_node)
            canonical_type = canonical_type.get_pointee()

        if utils.is_function(
                ast_item
        ) or canonical_type.kind == TypeKind.FUNCTIONPROTO:  #self.tokenizers['RES'].get_label(parent_node.token) == 'FUNCTION_DECL':
            canonical_type = ast_item.type.get_result()

            node_type = ast_item.type.get_result().spelling

        while canonical_type.kind == TypeKind.POINTER:
            parent_node = Node(self.tokenizers['RES'].get_token('POINTER'),
                               is_reserved=True,
                               parent=parent_node)
            canonical_type = canonical_type.get_pointee()

            node_type = canonical_type.get_pointee()

        if canonical_type.is_const_qualified() or node_type.startswith(
                'const'):
            parent_node = Node(
                self.tokenizers['RES'].get_token('CONST_QUALIFIED'),
                is_reserved=True,
                parent=parent_node)

        for token in ast_item.get_tokens():
            if 'auto' == token.spelling:
                type_node = Node(self.tokenizers['RES'].get_token('TYPE'),
                                 is_reserved=True,
                                 parent=parent_node)
                Node(self.tokenizers['TYPE'].get_token('auto'),
                     is_reserved=False,
                     parent=type_node)
                return
            break

        if canonical_type.kind in [
                TypeKind.CONSTANTARRAY, TypeKind.VARIABLEARRAY,
                TypeKind.INCOMPLETEARRAY, TypeKind.DEPENDENTSIZEDARRAY
        ]:
            parent_node = Node(self.tokenizers['RES'].get_token('TYPE_ARRAY'),
                               is_reserved=True,
                               parent=parent_node)

            array_sizes_node = Node(
                self.tokenizers['RES'].get_token('ARRAY_SIZES'),
                is_reserved=True,
                parent=parent_node)
            for array_size in re.findall('\[.*?\]', ast_item.type.spelling):
                # If it does not only consist of numbers then it is a reference to a variable
                if not all(str(s).isdigit() for s in array_size[1:-1]):
                    # array_size_type = Node(self.tokenizers['RES'].get_token("REF"), is_reserved=True, parent=array_sizes_node)
                    self.extract_variable_array_sizes_subtree(
                        array_size[1:-1], array_sizes_node)
                    # Node(self.tokenizers['NAME'].get_token(array_size[1:-1]), is_reserved=False, parent=array_size_type)
                else:
                    array_size_type = Node(
                        self.tokenizers['RES'].get_token("INTEGER_LITERAL"),
                        is_reserved=True,
                        parent=array_sizes_node)
                    Node(self.tokenizers['LITERAL'].get_token(
                        array_size[1:-1]),
                         is_reserved=False,
                         parent=array_size_type)

            node_type = re.sub('\[.*?\]', '', node_type)

        # For example: int& a = x (left value reference) or int&& b = 30 (right value reference)
        elif canonical_type.kind == TypeKind.LVALUEREFERENCE or canonical_type.kind == TypeKind.RVALUEREFERENCE:
            parent_node = Node(self.tokenizers['RES'].get_token(
                ast_item.type.kind.name),
                               is_reserved=True,
                               parent=parent_node)

        self.extract_builtin_type(node_type, parent_node)
示例#20
0
    def handle_declaration(self, ast_item, parent_node, parse_item, program):
        if utils.is_function(ast_item):
            if (ast_item.kind == CursorKind.FUNCTION_TEMPLATE):
                template_decl = Node(
                    self.tokenizers['RES'].get_token('TEMPLATE_DECL'),
                    is_reserved=True,
                    parent=parent_node)
                for child in ast_item.get_children():
                    if child.kind == CursorKind.TEMPLATE_TYPE_PARAMETER:
                        templ_param = Node(self.tokenizers['RES'].get_token(
                            child.kind.name),
                                           is_reserved=True,
                                           parent=template_decl)
                        self.create_terminal_nodes(child.spelling, child,
                                                   templ_param)

            func_decl = Node(self.tokenizers['RES'].get_token(
                ast_item.kind.name),
                             is_reserved=True,
                             parent=parent_node)

            if ast_item.access_specifier != AccessSpecifier.INVALID and self.tokenizers[
                    'RES'].get_label(parent_node.token) != 'root':
                acc_spec = Node(
                    self.tokenizers['RES'].get_token('ACCESS_SPECIFIER'),
                    is_reserved=True,
                    parent=func_decl)
                Node(self.tokenizers['RES'].get_token(
                    ast_item.access_specifier.name),
                     is_reserved=True,
                     parent=acc_spec)

            name = Node(self.tokenizers['RES'].get_token('NAME'),
                        is_reserved=True,
                        parent=func_decl)
            self.create_terminal_nodes(ast_item.spelling, ast_item, name)
            # Node(self.tn.get_token(ast_item.spelling), is_reserved=False, parent=name)
            if ast_item.kind != CursorKind.CONSTRUCTOR:
                self.handle_type(ast_item, func_decl)

            if ast_item.is_const_method():
                Node(self.tokenizers['RES'].get_token('CONST'),
                     is_reserved=True,
                     parent=func_decl)

            children = ast_item.get_children()
            for child in children:
                parm_declarations = False
                if child.kind == CursorKind.PARM_DECL:
                    if not parm_declarations:
                        parm_decl = Node(
                            self.tokenizers['RES'].get_token('PARM_DECL'),
                            is_reserved=True,
                            parent=func_decl)
                        parm_declarations = True

                    self.handle_type(child, parm_decl)

                    declarator = Node(
                        self.tokenizers['RES'].get_token('DECLARATOR'),
                        is_reserved=True,
                        parent=parm_decl)
                    reference = Node(self.tokenizers['RES'].get_token('NAME'),
                                     is_reserved=True,
                                     parent=declarator)
                    self.create_terminal_nodes(child.spelling, child,
                                               reference)

                    for c in child.get_children():
                        parse_item(c, declarator, program)

            # Get children of constructor which are not parameters or compount statements, thus are constructor initializers
            if ast_item.kind == CursorKind.CONSTRUCTOR:
                constructor_inits = [
                    child for child in ast_item.get_children()
                    if child.kind != CursorKind.COMPOUND_STMT
                    and child.kind != CursorKind.PARM_DECL
                ]
                for i in range(0, len(constructor_inits), 2):
                    constr_init = Node(self.tokenizers['RES'].get_token(
                        'CONSTRUCTOR_INITIALIZER'),
                                       is_reserved=True,
                                       parent=func_decl)

                    member_ref = Node(self.tokenizers['RES'].get_token(
                        constructor_inits[i].kind.name),
                                      is_reserved=True,
                                      parent=constr_init)
                    self.create_terminal_nodes(constructor_inits[i].spelling,
                                               constructor_inits[i],
                                               member_ref)

                    parse_item(constructor_inits[i + 1], constr_init, program)

            return func_decl

        elif utils.is_class(ast_item):
            if ast_item.kind == CursorKind.CLASS_TEMPLATE:
                template_decl = Node(
                    self.tokenizers['RES'].get_token('TEMPLATE_DECL'),
                    is_reserved=True,
                    parent=parent_node)
                for child in ast_item.get_children():
                    if child.kind == CursorKind.TEMPLATE_TYPE_PARAMETER:
                        templ_param = Node(self.tokenizers['RES'].get_token(
                            child.kind.name),
                                           is_reserved=True,
                                           parent=template_decl)
                        self.create_terminal_nodes(child.spelling, child,
                                                   templ_param)

            class_decl = Node(self.tokenizers['RES'].get_token('CLASS_DECL'),
                              is_reserved=True,
                              parent=parent_node)
            name = Node(self.tokenizers['RES'].get_token('NAME'),
                        is_reserved=True,
                        parent=class_decl)
            self.create_terminal_nodes(ast_item.spelling, ast_item, name)
            cmpnd_stmt = Node(
                self.tokenizers['RES'].get_token('COMPOUND_STMT'),
                is_reserved=True,
                parent=class_decl)

            return cmpnd_stmt
        elif ast_item.kind == CursorKind.VAR_DECL or ast_item.kind == CursorKind.FIELD_DECL or ast_item.kind == CursorKind.UNEXPOSED_DECL:
            var_decl = Node(self.tokenizers['RES'].get_token(
                ast_item.kind.name),
                            is_reserved=True,
                            parent=parent_node)

            if ast_item.access_specifier != AccessSpecifier.INVALID and ast_item.kind == CursorKind.FIELD_DECL:
                acc_spec = Node(
                    self.tokenizers['RES'].get_token('ACCESS_SPECIFIER'),
                    is_reserved=True,
                    parent=var_decl)
                Node(self.tokenizers['RES'].get_token(
                    ast_item.access_specifier.name),
                     is_reserved=True,
                     parent=acc_spec)

            self.handle_type(ast_item, var_decl)

            declarator = Node(self.tokenizers['RES'].get_token('DECLARATOR'),
                              is_reserved=True,
                              parent=var_decl)
            reference = Node(self.tokenizers['RES'].get_token('NAME'),
                             is_reserved=True,
                             parent=declarator)
            self.create_terminal_nodes(ast_item.spelling, ast_item, reference)

            return declarator

        elif utils.is_struct(ast_item):
            # If parent is declarator we are declaring a structure variable so we
            # do not want to declare the enitre structure again
            if self.tokenizers['RES'].get_label(
                    parent_node.token) != 'DECLARATOR':
                class_decl = Node(
                    self.tokenizers['RES'].get_token('STRUCT_DECL'),
                    is_reserved=True,
                    parent=parent_node)
                name = Node(self.tokenizers['RES'].get_token('NAME'),
                            is_reserved=True,
                            parent=class_decl)
                self.create_terminal_nodes(ast_item.spelling, ast_item, name)
                cmpnd_stmt = Node(
                    self.tokenizers['RES'].get_token('COMPOUND_STMT'),
                    is_reserved=True,
                    parent=class_decl)

                return cmpnd_stmt

            return parent_node