Пример #1
0
def get_context(doc, segment, linear_value):
    node = segment[0]
    context = []
    functions = {}
    if not domutil.beyond_parent(node, linear_value):
        context = domutil.get_previous_AST_elements(node, linear_value)
        context.extend(segment)
    elif not domutil.beyond_method(node, linear_value):
        linear_value -= domutil.get_previous_AST_element_number(node) + 1
        node = domutil.get_parent_AST_element(node)
        context, functions = get_context(doc, [node], linear_value)
    else:
        function_node = domutil.get_function_element(node)
        name_node = domutil.get_first_child_by_tagname(function_node, 'name')
        name = domutil.get_text_content(name_node)
        for call_node in doc.getElementsByTagName('call'):
            call_name_node = domutil.get_first_child_by_tagname(call_node, 'name')
            call_name = domutil.get_text_content(call_name_node)
            if call_name == name:
                if config.get('show_context_search_debug'):
                    blue('beyound method '+name + ' and found call node: ')
                linear_value -= domutil.get_previous_AST_element_number_until_function(node) + 1
                context, functions = get_context(doc, [call_node], linear_value)
                functions[call_name] = function_node
                return context, functions
        if config.get('show_context_search_debug') == 'true':
            yellow('beyound method '+name+' but did not found call node: ')
    return context, functions
Пример #2
0
def get_segments(doc):
    results = []
    code_selection = config.get('code_selection')
    if code_selection == 'annotation' or code_selection == 'assertion':
        comments = doc.getElementsByTagName('comment')
        for comment in comments:
            comment_text = domutil.get_text_content(comment)
            if '@HeliumStart' in comment_text:
                results.append(get_annotation_segment(comment))
    elif code_selection == 'assertion':
        comments = doc.getElementsByTagName('comment')
        for comment in comments:
            comment_text = domutil.get_text_content(comment)
            if '@HeliumAssert' in comment_text:
                if domutil.has_next_AST_element(comment):
                    results.append([domutil.get_next_AST_element(comment)])
    elif code_selection == 'loop':
        for while_loop_node in doc.getElementsByTagName('while'):
            results.append([while_loop_node])
        for for_loop_node in doc.getElementsByTagName('for'):
            results.append([for_loop_node])
    else:
        logger.error('unsupported code selection method: ' + repr(code_selection))
        exit(1)
    return results
Пример #3
0
def parse_function(node):
    """parse a <function> node.
    :return (type_name, function_name, ((type1, param1), (type2, param2), ...))
    """
    assert(domutil.is_element(node) and node.tagName == 'function')
    type_node = domutil.get_first_child_by_tagname(node, 'type')
    name_node = domutil.get_first_child_by_tagname(node, 'name')
    parameter_list_node = domutil.get_first_child_by_tagname(node, 'parameter_list')
    type_name = domutil.get_text_content(type_node)
    name = domutil.get_text_content(name_node)
    params = parse_parameter_list(parameter_list_node)
    return (type_name, name, params)
Пример #4
0
def parse_recursive(include_path):
    """recursively parse a header file, including all the includes inside it.
    :param include_path: stdio.h
    :return a set contains all defines,structs,functions
    """
    logger.info('parsing: '+include_path)
    typedefs = set()
    defines = set()
    functions = set()
    to_parse_set = set()
    to_parse_set.add(include_path)
    parsed_set = set()
    while to_parse_set:
        include_path = to_parse_set.pop()
        parsed_set.add(include_path)
        full_path = include_path_to_full_path(include_path)
        if not full_path:
            logger.warning('this header does not exists: '+include_path)
            continue
        xml = srcmlutil.get_xml_from_file(full_path)
        doc = parseString(xml)
        for typedef_node in doc.getElementsByTagName('typedef'):
            name_node = domutil.get_first_child_by_tagname(typedef_node, 'name')
            name = domutil.get_text_content(name_node)
            if name and not name.startswith('_'):
                typedefs.add(name)
        for struct_node in doc.getElementsByTagName('struct'):
            name_node = domutil.get_first_child_by_tagname(struct_node, 'name')
            name = domutil.get_text_content(name_node)
            if name and not name.startswith('_'):
                typedefs.add(name)
        for define_node in doc.getElementsByTagName('cpp:define'):
            name_node = domutil.get_first_child_by_tagnames(define_node, 'cpp:macro', 'name')
            name = domutil.get_text_content(name_node)
            if name and not name.startswith('_'):
                defines.add(name)
        for function_node in doc.getElementsByTagName('function_decl'):
            name_node = domutil.get_first_child_by_tagname(function_node, 'name')
            name = domutil.get_text_content(name_node)
            if name and not name.startswith('_'):
                functions.add(name)
        for include_node in doc.getElementsByTagName('cpp:include'):
            name_node = domutil.get_first_child_by_tagname(include_node, 'cpp:file')
            name = domutil.get_text_content(name_node)
            name = name[1:-1] # remove "<>"
            if name not in parsed_set | to_parse_set:
                to_parse_set.add(name)
    return typedefs | defines | functions
Пример #5
0
def parse_decl(node):
    """Parse a <decl> node.
    Assume only one variable can be in the statement(guranteed by decl spliter preprocessor).
    :return (type,name,init)
    """
    assert(domutil.is_element(node) and node.tagName == 'decl')
    type_node = domutil.get_first_child_by_tagname(node, 'type')
    name_node = domutil.get_first_child_by_tagname(node, 'name')
    type_name = domutil.get_text_content(type_node)
    var_name = domutil.get_text_content(name_node)
    if '[' in var_name:
        new_var_name = var_name[:var_name.find('[')]
        new_type_name = type_name + var_name[var_name.find('['):]
        var_name = new_var_name
        type_name = new_type_name
    # TODO init
    return (type_name, var_name, None)
Пример #6
0
 def handle_segment(self, doc, segment):
     if not segment: return
     segment_text = ''
     for node in segment:
         segment_text += domutil.get_text_content(node) + '\n'
     if config.get('max_segment_size'):
         if segment_text.count('\n') > int(config.get('max_segment_size')):
             return
     if config.get('show_segment_size') == 'true':
         print('segment size: '+str(segment_text.count('\n')))
     if config.get('show_parent_function_size') == 'true':
         function_node = domutil.get_parent_by_tagname(segment[0], 'function')
         function_size = 0
         if function_node:
             text = domutil.get_text_content(function_node)
             function_size = text.count('\n')
         print('parent function size: '+str(function_size))
     if config.get('show_segment') == 'true':
         print(segment_text)
     instrument_segment(doc, segment)
     max_linear_value = int(config.get('max_linear_value'))
     stop_criteria = StopCriteria()
     for i in range(max_linear_value+1):
         logger.info('context search: ' + str(i))
         if config.get('show_context_search_value') == 'true':
             print('context search value: '+str(i))
         context, functions = get_context(doc, segment, i)
         if not context: break
         if self.builder.build(doc, context, functions):
             if config.get('run_test') == 'true':
                 result = self.tester.test()
                 if self.analyzer == 'recursive':
                     if result == True:
                         green('found equivalent loops for recursive call')
                         print_nodes(segment)
                 elif self.analyzer == 'invariant':
                     if config.get('show_analyze_result') == 'true':
                         print(result)
                     stop_criteria.add(result)
                     stable_result = stop_criteria.get_stable()
                     if stable_result:
                         green(stable_result)
                 if config.get('interact_after_test') == 'true':
                     input('Enter to continue ...')
     remove_instrument(doc)
Пример #7
0
def get_comment_node_by_annotation(doc, s):
    """Get the first comment node with string containing `s`
    :return Node
    """
    for comment_node in doc.getElementsByTagName('comment'):
        text = domutil.get_text_content(comment_node)
        if s in text:
            return comment_node
    return None
Пример #8
0
def extract_calls(node):
    """Extract all <call><name> inside the node.
    :return a list of names as strings
    """
    names = []
    for call_node in node.getElementsByTagName('call'):
        name_node = domutil.get_first_child_by_tagname(call_node, 'name')
        names.append(domutil.get_text_content(name_node))
    return names
Пример #9
0
def get_struct_name(code):
    doc = domutil.get_doc_from_code(code)
    structs = doc.getElementsByTagName('struct')
    if structs:
        struct = structs[0]
        name_node = domutil.get_first_child_by_tagname(struct, 'name')
        name = domutil.get_text_content(name_node)
        return name
    return None
Пример #10
0
def extract_to_resolve(node, resolved):
    """Extract functions, types, undefined global variables to resolve
    :param node: dom node that need to resolve
    :return a set
    """
    functions = set()
    types = set()
    unknown = set()
    for n in node.getElementsByTagName('call'):
        # in #ifdef, there may be `#elif defined(__sun)`
        if domutil.in_node(n, 'cpp:ifdef', level=2) or\
            domutil.in_node(n, 'cpp:elif', level=2) or\
            domutil.in_node(n, 'cpp:ifndef', level=2):
            continue
        call_name_node = domutil.get_first_child_by_tagname(n, 'name')
        call_name = domutil.get_text_content(call_name_node)
        functions.add(call_name)
    for n in node.getElementsByTagName('type'):
        if domutil.in_node(n, 'cpp:define', level=4): continue
        name_node = domutil.get_first_child_by_tagname(n, 'name')
        name = domutil.get_text_content(name_node)
        types.add(name)
    for n in node.getElementsByTagName('cpp:value'):
        value = domutil.get_text_content(n)
        types |= syntaxutil.parse_type_cast(value)
    for n in node.getElementsByTagName('cpp:define'):
        functions |= syntaxutil.parse_cpp_define(n)
    for n in node.getElementsByTagName('cpp:value'):
        # now lets resolve every word
        value = domutil.get_text_content(n)
        for word in re.findall(r'\b\w+\b', value):
            unknown.add(word)
    # if function return type if enum, the function is not marked as <function>
    # but what if the enum is trully enum? This is addressed in resolver/localfunc.py
    for n in node.getElementsByTagName('enum'):
        name_node = domutil.get_first_child_by_tagname(n, 'name')
        name = domutil.get_text_content(name_node)
        functions.add(name)
    variables = io.get_undefined_vars([node], resolved)
    if '' in functions: functions.remove('')
    if '' in types: types.remove('')
    if '' in variables: variables.remove('')
    # return functions-resolved, types-resolved, variables
    return (functions | types | variables | unknown) - resolved
Пример #11
0
def parse_expr(node):
    """Parse a <expr>
    :retrn a set of variable names used.
    :bug it=(item*)ptr; the type cast will be recognized as name
    :bug there may be <expr> inside <expr>
    """
    assert(domutil.is_element(node) and node.tagName == 'expr')
    name_nodes = domutil.get_children_by_tagname(node, 'name')
    names = set()
    for name_node in name_nodes:
        name = domutil.get_text_content(name_node)
        # TODO move array related code into util
        names.add(simplify_variable_name(name))
    # for a->b
    double_name_node = domutil.get_first_child_by_tagnames(node, 'name', 'name')
    if double_name_node:
        name = domutil.get_text_content(double_name_node)
        names.add(simplify_variable_name(name))
    return names
Пример #12
0
def get_comment_nodes_by_annotation(doc, s):
    """
    :return a list of nodes
    """
    result = []
    for comment_node in doc.getElementsByTagName('comment'):
        text = domutil.get_text_content(comment_node)
        if s in text:
            result.append(comment_node)
    return result
Пример #13
0
def parse_typedef(node):
    """Parse a <typedef> node
    <typedef>typedef <type>struct <name>A</name> *</type> <name>hello_t</name>;</typedef>
    <typedef>typedef <type><struct>struct <name>_stritem</name> <block> ... </struct></type> <name>item</name>;</typedef>
    :return (alias, original)
    """
    assert(domutil.is_element(node) and node.tagName == 'typedef')
    type_node = domutil.get_first_child_by_tagname(node, 'type')
    # support <function_decl> in <typedef>, i.e. typdef void *func(int a, int b)
    # return: (func, '')
    if not type_node:
        function_decl_node = domutil.get_first_child_by_tagname(node, 'function_decl')
        name_node = domutil.get_first_child_by_tagname(function_decl_node, 'name')
        alias = domutil.get_text_content(name_node)
        return (alias.strip(), '')
    name_node = domutil.get_first_child_by_tagname(node, 'name')
    original = domutil.get_text_content_except(type_node, 'block')
    alias = domutil.get_text_content(name_node)
    return (alias.strip(), original.strip())
Пример #14
0
def func(directory):
    for root,_,files in os.walk(directory):
        for f in files:
            if f.endswith('.c') or f.endswith('.h'):
                filename = os.path.join(root, f)
                print(filename)
                doc = domutil.get_doc_from_c_file(filename)
                for comment_node in doc.getElementsByTagName('comment'):
                    comment_node.parentNode.removeChild(comment_node)
                with open(filename, 'w') as f:
                    f.write(domutil.get_text_content(doc.documentElement))
Пример #15
0
def get_annotation_segment(node):
    """from HeliumStart to HeliumStop
    """
    node_list = [node]
    while node.nextSibling:
        node = node.nextSibling
        node_list.append(node)
        if domutil.is_element(node) and node.tagName == 'comment':
            comment_text = domutil.get_text_content(node)
            if '@HeliumStop' in comment_text: break
    return node_list
Пример #16
0
def parse_cpp_define(node):
    """parse a #define statement. <cpp:define>
    :return a set of function name to resolve
    """
    to_resolve = set()
    # cpp_macro = domutil.get_first_child_by_tagname(node, 'cpp:macro')
    cpp_value = domutil.get_first_child_by_tagname(node, 'cpp:value')
    # param_list_node = domutil.get_first_child_by_tagname(cpp_macro, 'parameter_list')
    # params = parse_parameter_list(param_list_node)
    value = domutil.get_text_content(cpp_value)
    doc = domutil.get_doc_from_code(value)
    for call_node in doc.getElementsByTagName('call'):
        name_node = domutil.get_first_child_by_tagname(call_node, 'name')
        name = domutil.get_text_content(name_node)
        to_resolve.add(name)
    # emitf(__LINE__, "\t" __VA_ARGS__
    # this will be passed as <macro>
    for macro_node in doc.getElementsByTagName('macro'):
        name_node = domutil.get_first_child_by_tagname(macro_node, 'name')
        name = domutil.get_text_content(name_node)
        to_resolve.add(name)
    return to_resolve
Пример #17
0
 def generate(self):
     os.makedirs(self.output_folder, exist_ok=True)
     with open(self.output_folder + "/generate.c", "w") as f:
         f.write('#include "support.h"\n')
         # main
         f.write("int main() {\n")
         f.write("//@HeliumInput\n")
         for var_name in self.inputs:
             type_name = self.inputs[var_name]
             type_component = typeutil.parse_type(type_name)
             base = type_component["base"]
             array = type_component["array"]
             pointer = type_component["pointer"]
             f.write(base + pointer + " " + var_name + array + ";\n")
         f.write("//@HeliumInputEnd\n")
         f.write("/**********Context********/\n")
         context_size = 0
         function_size = 0
         context_text = ""
         for node in self.context:
             context_text += domutil.get_text_content(node)
         context_text = re.sub(r"\breturn\b[^;\n]*;", "return 0;", context_text)
         if config.get("show_context") == "true":
             print(context_text)
         context_size = context_text.count("\n") + 1
         f.write(context_text)
         f.write("\n}\n")
         # functions
         for function in self.functions.values():
             text = domutil.get_text_content(function)
             function_size += text.count("\n")
             f.write(text)
             f.write("\n")
         if config.get("show_context_size") == "true":
             print("context size: " + str(context_size))
             if function_size != 0:
                 print("context function size: " + str(function_size))
     f.close()
Пример #18
0
def get_segment_nodes(doc):
    """Get nodes between //@HeliumStart and //@HeliumStop
    :return a list of nodes
    """
    comment_node = get_comment_node_by_annotation(doc, '@HelumStart')
    if not comment_node: return []
    results = []
    node = comment_node
    while node.nextSibling:
        node = node.nextSibling
        if domutil.is_element(node):
            results.append(node)
            if '@HeliumStop' in domutil.get_text_content(node):
                return results
    return None
Пример #19
0
def get_struct_alias(code):
    doc = domutil.get_doc_from_code(code)
    typedefs = doc.getElementsByTagName('typedef')
    if typedefs:
        # this is typedef struct xxx {} name;
        typedef = typedefs[0]
        type_node = domutil.get_first_child_by_tagname(typedef, 'type')
        struct_node = domutil.get_first_child_by_tagname(type_node, 'struct')
        if not struct_node:
            logger.warning('it is not a structure')
            return None
        alias_node = domutil.get_first_child_by_tagname(typedef, 'name')
        alias = domutil.get_text_content(alias_node)
        return alias
    return None
Пример #20
0
 def instrument_input(self):
     logger.info('instrumenting input')
     init_code = ''
     input_nodes = annotation.get_input_nodes(self.doc)
     if not input_nodes: return
     for input_node in input_nodes:
         # input_nodes should be 'decl_stmt'
         type_name, var_name, _ = syntaxutil.parse_decl_stmt(input_node)
         if config.get('handle_array') == 'true':
             if '[' in type_name:
                 text = domutil.get_text_content(input_node)
                 self.code = self.code.replace(text, '')
         # We need to do the simplify
         self.struct_limit = 30
         init_code += self.generate_input(type_name, var_name)
     self.code = self.code.replace('//@HeliumInputEnd', init_code, 1)
Пример #21
0
def get_input_nodes(doc):
    """Get nodes between //@HeliumInput and //@HeliumInputEnd
    :return a list of nodes
    """
    comment_node = get_comment_node_by_annotation(doc, '@HeliumInput')
    if not comment_node: return []
    results = []
    node = comment_node
    while node.nextSibling:
        node = node.nextSibling
        if domutil.is_element(node):
            if node.tagName == 'decl_stmt':
                results.append(node)
            elif '@HeliumInputEnd' in domutil.get_text_content(node):
                return results
    return None
Пример #22
0
def get_undefined_vars(nodes, resolved):
    """Get undefined variable.
    :param nodes: a list of dom nodes
    :param resolved: a set of names as strings that is assumed to be defined
    :return a set of names
    """
    result = set()
    for node in nodes:
        if domutil.is_element(node):
            if node.tagName == 'decl_stmt':
                _,var_name,_ = syntaxutil.parse_decl_stmt(node)
                resolved.add(var_name)
            elif node.tagName == 'expr':
                # in #ifdef, there may be `#elif defined(__sun)`
                if domutil.in_node(node, 'cpp:ifdef', level=2) or\
                    domutil.in_node(node, 'cpp:elif', level=2) or\
                    domutil.in_node(node, 'cpp:ifndef', level=2):
                    continue
                name_set = syntaxutil.parse_expr(node)
                for name in name_set:
                    # uint8_t, false, true, NULL
                    if sys.resolve_single(name):
                        continue
                    # here we find the undefined variable
                    if name not in resolved and name not in result:
                        result.add(name)
            elif node.tagName == 'for':
                init_node = domutil.get_first_child_by_tagname(node, 'init')
                if init_node:
                    _, var = syntaxutil.parse_for_init(init_node)
                    if var:
                        resolved.add(var)
            elif node.tagName == 'parameter_list':
                params = syntaxutil.parse_parameter_list(node)
                for _,name in params:
                    resolved.add(name)
            elif node.tagName == 'cpp:define':
                value_node = domutil.get_first_child_by_tagname(node, 'cpp:value')
                text = domutil.get_text_content(value_node)
                match = re.findall(r'([A-Z_]{2,})', text)
                for m in match:
                    if m not in resolved:
                        result.add(m)
            new_result = get_undefined_vars(node.childNodes, resolved | result)
            result.update(new_result)
    return result
Пример #23
0
def build(function_node):
    _,func_name, fields = syntaxutil.parse_function(function_node)
    code = ''
    function_code = domutil.get_text_content(function_node)
    instrumented_function = function_code[:function_code.find('{')+1]+'\nprintf("%d ", 1);\n'+function_code[function_code.find('{')+1:]
    code += includes
    code += instrumented_function
    code += '\nint main() {\n'
    for type_name, var_name in fields:
        code += type_name + ' ' + var_name + ';\n'
        # if typeutil.is_primitive_type(type_name)
        type_component = typeutil.parse_type(type_name)
        if typeutil.is_primitive_type(type_component['base']):
            code += instrumenter.generate_primitive_input(type_name, var_name)
    code += func_name + '(' + ','.join([var for _,var in fields]) + ');\n'
    code += '}\n'
    return code
Пример #24
0
def parse_struct(node):
    """Parse a <struct> node
    Do not anonymous inner enum, union or structs.
    :return (name, [(type1, field1), (type1, field1), ...])
    """
    assert(domutil.is_element(node) and node.tagName == 'struct')
    name_node = domutil.get_first_child_by_tagname(node, 'name')
    block_node = domutil.get_first_child_by_tagname(node, 'block')
    if name_node:
        name = domutil.get_text_content(name_node)
    else:
        name = ''
    fields = []
    for decl_stmt_node in domutil.get_children_by_tagname(block_node, 'decl_stmt'):
        decl = parse_decl_stmt(decl_stmt_node)
        fields.append(decl[0:2])
    return (name, fields)
Пример #25
0
    def sort_resolved(self):
        defines = set()
        enums = set()
        structs = set()
        typedefs = set()
        unions = set()
        variables = set()
        declares = set()
        functions = set()
        # fdvgetsu
        for key in self.resolved:
            name, t = key.split(".")
            code = self.resolved[key][0].strip()
            if t == "d":
                defines.add(code)
            elif t == "f":
                functions.add(code)
            elif t == "v":
                variables.add(code)
            elif t == "g":
                enums.add(self.resolved[key])
            elif t == "e":
                enums.add(self.resolved[key])
            elif t == "t":
                typedefs.add(self.resolved[key])
            elif t == "s":
                structs.add(self.resolved[key])
            elif t == "u":
                unions.add(self.resolved[key])

        # function declarations for the functions in generate.c
        # these function declares should be here because the functions in support.h may also use them
        for function in self.functions.values():
            text = domutil.get_text_content(function)
            decl = text[: text.find("{")].strip() + ";"
            if decl.count(";") > 1:
                continue
            declares.add(decl)
        for f in functions:
            decl = f[: f.find("{")].strip() + ";"
            # temp fix for strange syntax
            if decl.count(";") > 1:
                continue
            declares.add(decl)
        # sort struct, unions, and one-line typedefs together
        sut_code = sort_struct(structs | unions | typedefs | enums)
        # return defines+enums+sut+variables+declares+functions
        code = ""
        code += "/***** Defines ******/\n"
        code += "\n".join(defines)
        # code += '\n/***** enums ******/\n'
        # code += '\n'.join(enums)
        code += "\n/***** sut ******/\n"
        # code += '\n'.join(sut)
        code += sut_code
        code += "\n/***** variables ******/\n"
        code += "\n".join(variables)
        code += "\n/****** declares ******/\n"
        code += "\n".join(declares)
        code += "\n/****** functions ******/\n"
        code += "\n".join(functions)
        return code
Пример #26
0
def print_nodes(nodes):
    for node in nodes:
        print(domutil.get_text_content(node), end='')