def get_context(doc, segment, linear_value): node = segment[0] context = [] functions = {} if not domutil.beyond_parent(node, linear_value): context = domutil.get_previous_AST_elements(node, linear_value) context.extend(segment) elif not domutil.beyond_method(node, linear_value): linear_value -= domutil.get_previous_AST_element_number(node) + 1 node = domutil.get_parent_AST_element(node) context, functions = get_context(doc, [node], linear_value) else: function_node = domutil.get_function_element(node) name_node = domutil.get_first_child_by_tagname(function_node, 'name') name = domutil.get_text_content(name_node) for call_node in doc.getElementsByTagName('call'): call_name_node = domutil.get_first_child_by_tagname(call_node, 'name') call_name = domutil.get_text_content(call_name_node) if call_name == name: if config.get('show_context_search_debug'): blue('beyound method '+name + ' and found call node: ') linear_value -= domutil.get_previous_AST_element_number_until_function(node) + 1 context, functions = get_context(doc, [call_node], linear_value) functions[call_name] = function_node return context, functions if config.get('show_context_search_debug') == 'true': yellow('beyound method '+name+' but did not found call node: ') return context, functions
def get_segments(doc): results = [] code_selection = config.get('code_selection') if code_selection == 'annotation' or code_selection == 'assertion': comments = doc.getElementsByTagName('comment') for comment in comments: comment_text = domutil.get_text_content(comment) if '@HeliumStart' in comment_text: results.append(get_annotation_segment(comment)) elif code_selection == 'assertion': comments = doc.getElementsByTagName('comment') for comment in comments: comment_text = domutil.get_text_content(comment) if '@HeliumAssert' in comment_text: if domutil.has_next_AST_element(comment): results.append([domutil.get_next_AST_element(comment)]) elif code_selection == 'loop': for while_loop_node in doc.getElementsByTagName('while'): results.append([while_loop_node]) for for_loop_node in doc.getElementsByTagName('for'): results.append([for_loop_node]) else: logger.error('unsupported code selection method: ' + repr(code_selection)) exit(1) return results
def parse_function(node): """parse a <function> node. :return (type_name, function_name, ((type1, param1), (type2, param2), ...)) """ assert(domutil.is_element(node) and node.tagName == 'function') type_node = domutil.get_first_child_by_tagname(node, 'type') name_node = domutil.get_first_child_by_tagname(node, 'name') parameter_list_node = domutil.get_first_child_by_tagname(node, 'parameter_list') type_name = domutil.get_text_content(type_node) name = domutil.get_text_content(name_node) params = parse_parameter_list(parameter_list_node) return (type_name, name, params)
def parse_recursive(include_path): """recursively parse a header file, including all the includes inside it. :param include_path: stdio.h :return a set contains all defines,structs,functions """ logger.info('parsing: '+include_path) typedefs = set() defines = set() functions = set() to_parse_set = set() to_parse_set.add(include_path) parsed_set = set() while to_parse_set: include_path = to_parse_set.pop() parsed_set.add(include_path) full_path = include_path_to_full_path(include_path) if not full_path: logger.warning('this header does not exists: '+include_path) continue xml = srcmlutil.get_xml_from_file(full_path) doc = parseString(xml) for typedef_node in doc.getElementsByTagName('typedef'): name_node = domutil.get_first_child_by_tagname(typedef_node, 'name') name = domutil.get_text_content(name_node) if name and not name.startswith('_'): typedefs.add(name) for struct_node in doc.getElementsByTagName('struct'): name_node = domutil.get_first_child_by_tagname(struct_node, 'name') name = domutil.get_text_content(name_node) if name and not name.startswith('_'): typedefs.add(name) for define_node in doc.getElementsByTagName('cpp:define'): name_node = domutil.get_first_child_by_tagnames(define_node, 'cpp:macro', 'name') name = domutil.get_text_content(name_node) if name and not name.startswith('_'): defines.add(name) for function_node in doc.getElementsByTagName('function_decl'): name_node = domutil.get_first_child_by_tagname(function_node, 'name') name = domutil.get_text_content(name_node) if name and not name.startswith('_'): functions.add(name) for include_node in doc.getElementsByTagName('cpp:include'): name_node = domutil.get_first_child_by_tagname(include_node, 'cpp:file') name = domutil.get_text_content(name_node) name = name[1:-1] # remove "<>" if name not in parsed_set | to_parse_set: to_parse_set.add(name) return typedefs | defines | functions
def parse_decl(node): """Parse a <decl> node. Assume only one variable can be in the statement(guranteed by decl spliter preprocessor). :return (type,name,init) """ assert(domutil.is_element(node) and node.tagName == 'decl') type_node = domutil.get_first_child_by_tagname(node, 'type') name_node = domutil.get_first_child_by_tagname(node, 'name') type_name = domutil.get_text_content(type_node) var_name = domutil.get_text_content(name_node) if '[' in var_name: new_var_name = var_name[:var_name.find('[')] new_type_name = type_name + var_name[var_name.find('['):] var_name = new_var_name type_name = new_type_name # TODO init return (type_name, var_name, None)
def handle_segment(self, doc, segment): if not segment: return segment_text = '' for node in segment: segment_text += domutil.get_text_content(node) + '\n' if config.get('max_segment_size'): if segment_text.count('\n') > int(config.get('max_segment_size')): return if config.get('show_segment_size') == 'true': print('segment size: '+str(segment_text.count('\n'))) if config.get('show_parent_function_size') == 'true': function_node = domutil.get_parent_by_tagname(segment[0], 'function') function_size = 0 if function_node: text = domutil.get_text_content(function_node) function_size = text.count('\n') print('parent function size: '+str(function_size)) if config.get('show_segment') == 'true': print(segment_text) instrument_segment(doc, segment) max_linear_value = int(config.get('max_linear_value')) stop_criteria = StopCriteria() for i in range(max_linear_value+1): logger.info('context search: ' + str(i)) if config.get('show_context_search_value') == 'true': print('context search value: '+str(i)) context, functions = get_context(doc, segment, i) if not context: break if self.builder.build(doc, context, functions): if config.get('run_test') == 'true': result = self.tester.test() if self.analyzer == 'recursive': if result == True: green('found equivalent loops for recursive call') print_nodes(segment) elif self.analyzer == 'invariant': if config.get('show_analyze_result') == 'true': print(result) stop_criteria.add(result) stable_result = stop_criteria.get_stable() if stable_result: green(stable_result) if config.get('interact_after_test') == 'true': input('Enter to continue ...') remove_instrument(doc)
def get_comment_node_by_annotation(doc, s): """Get the first comment node with string containing `s` :return Node """ for comment_node in doc.getElementsByTagName('comment'): text = domutil.get_text_content(comment_node) if s in text: return comment_node return None
def extract_calls(node): """Extract all <call><name> inside the node. :return a list of names as strings """ names = [] for call_node in node.getElementsByTagName('call'): name_node = domutil.get_first_child_by_tagname(call_node, 'name') names.append(domutil.get_text_content(name_node)) return names
def get_struct_name(code): doc = domutil.get_doc_from_code(code) structs = doc.getElementsByTagName('struct') if structs: struct = structs[0] name_node = domutil.get_first_child_by_tagname(struct, 'name') name = domutil.get_text_content(name_node) return name return None
def extract_to_resolve(node, resolved): """Extract functions, types, undefined global variables to resolve :param node: dom node that need to resolve :return a set """ functions = set() types = set() unknown = set() for n in node.getElementsByTagName('call'): # in #ifdef, there may be `#elif defined(__sun)` if domutil.in_node(n, 'cpp:ifdef', level=2) or\ domutil.in_node(n, 'cpp:elif', level=2) or\ domutil.in_node(n, 'cpp:ifndef', level=2): continue call_name_node = domutil.get_first_child_by_tagname(n, 'name') call_name = domutil.get_text_content(call_name_node) functions.add(call_name) for n in node.getElementsByTagName('type'): if domutil.in_node(n, 'cpp:define', level=4): continue name_node = domutil.get_first_child_by_tagname(n, 'name') name = domutil.get_text_content(name_node) types.add(name) for n in node.getElementsByTagName('cpp:value'): value = domutil.get_text_content(n) types |= syntaxutil.parse_type_cast(value) for n in node.getElementsByTagName('cpp:define'): functions |= syntaxutil.parse_cpp_define(n) for n in node.getElementsByTagName('cpp:value'): # now lets resolve every word value = domutil.get_text_content(n) for word in re.findall(r'\b\w+\b', value): unknown.add(word) # if function return type if enum, the function is not marked as <function> # but what if the enum is trully enum? This is addressed in resolver/localfunc.py for n in node.getElementsByTagName('enum'): name_node = domutil.get_first_child_by_tagname(n, 'name') name = domutil.get_text_content(name_node) functions.add(name) variables = io.get_undefined_vars([node], resolved) if '' in functions: functions.remove('') if '' in types: types.remove('') if '' in variables: variables.remove('') # return functions-resolved, types-resolved, variables return (functions | types | variables | unknown) - resolved
def parse_expr(node): """Parse a <expr> :retrn a set of variable names used. :bug it=(item*)ptr; the type cast will be recognized as name :bug there may be <expr> inside <expr> """ assert(domutil.is_element(node) and node.tagName == 'expr') name_nodes = domutil.get_children_by_tagname(node, 'name') names = set() for name_node in name_nodes: name = domutil.get_text_content(name_node) # TODO move array related code into util names.add(simplify_variable_name(name)) # for a->b double_name_node = domutil.get_first_child_by_tagnames(node, 'name', 'name') if double_name_node: name = domutil.get_text_content(double_name_node) names.add(simplify_variable_name(name)) return names
def get_comment_nodes_by_annotation(doc, s): """ :return a list of nodes """ result = [] for comment_node in doc.getElementsByTagName('comment'): text = domutil.get_text_content(comment_node) if s in text: result.append(comment_node) return result
def parse_typedef(node): """Parse a <typedef> node <typedef>typedef <type>struct <name>A</name> *</type> <name>hello_t</name>;</typedef> <typedef>typedef <type><struct>struct <name>_stritem</name> <block> ... </struct></type> <name>item</name>;</typedef> :return (alias, original) """ assert(domutil.is_element(node) and node.tagName == 'typedef') type_node = domutil.get_first_child_by_tagname(node, 'type') # support <function_decl> in <typedef>, i.e. typdef void *func(int a, int b) # return: (func, '') if not type_node: function_decl_node = domutil.get_first_child_by_tagname(node, 'function_decl') name_node = domutil.get_first_child_by_tagname(function_decl_node, 'name') alias = domutil.get_text_content(name_node) return (alias.strip(), '') name_node = domutil.get_first_child_by_tagname(node, 'name') original = domutil.get_text_content_except(type_node, 'block') alias = domutil.get_text_content(name_node) return (alias.strip(), original.strip())
def func(directory): for root,_,files in os.walk(directory): for f in files: if f.endswith('.c') or f.endswith('.h'): filename = os.path.join(root, f) print(filename) doc = domutil.get_doc_from_c_file(filename) for comment_node in doc.getElementsByTagName('comment'): comment_node.parentNode.removeChild(comment_node) with open(filename, 'w') as f: f.write(domutil.get_text_content(doc.documentElement))
def get_annotation_segment(node): """from HeliumStart to HeliumStop """ node_list = [node] while node.nextSibling: node = node.nextSibling node_list.append(node) if domutil.is_element(node) and node.tagName == 'comment': comment_text = domutil.get_text_content(node) if '@HeliumStop' in comment_text: break return node_list
def parse_cpp_define(node): """parse a #define statement. <cpp:define> :return a set of function name to resolve """ to_resolve = set() # cpp_macro = domutil.get_first_child_by_tagname(node, 'cpp:macro') cpp_value = domutil.get_first_child_by_tagname(node, 'cpp:value') # param_list_node = domutil.get_first_child_by_tagname(cpp_macro, 'parameter_list') # params = parse_parameter_list(param_list_node) value = domutil.get_text_content(cpp_value) doc = domutil.get_doc_from_code(value) for call_node in doc.getElementsByTagName('call'): name_node = domutil.get_first_child_by_tagname(call_node, 'name') name = domutil.get_text_content(name_node) to_resolve.add(name) # emitf(__LINE__, "\t" __VA_ARGS__ # this will be passed as <macro> for macro_node in doc.getElementsByTagName('macro'): name_node = domutil.get_first_child_by_tagname(macro_node, 'name') name = domutil.get_text_content(name_node) to_resolve.add(name) return to_resolve
def generate(self): os.makedirs(self.output_folder, exist_ok=True) with open(self.output_folder + "/generate.c", "w") as f: f.write('#include "support.h"\n') # main f.write("int main() {\n") f.write("//@HeliumInput\n") for var_name in self.inputs: type_name = self.inputs[var_name] type_component = typeutil.parse_type(type_name) base = type_component["base"] array = type_component["array"] pointer = type_component["pointer"] f.write(base + pointer + " " + var_name + array + ";\n") f.write("//@HeliumInputEnd\n") f.write("/**********Context********/\n") context_size = 0 function_size = 0 context_text = "" for node in self.context: context_text += domutil.get_text_content(node) context_text = re.sub(r"\breturn\b[^;\n]*;", "return 0;", context_text) if config.get("show_context") == "true": print(context_text) context_size = context_text.count("\n") + 1 f.write(context_text) f.write("\n}\n") # functions for function in self.functions.values(): text = domutil.get_text_content(function) function_size += text.count("\n") f.write(text) f.write("\n") if config.get("show_context_size") == "true": print("context size: " + str(context_size)) if function_size != 0: print("context function size: " + str(function_size)) f.close()
def get_segment_nodes(doc): """Get nodes between //@HeliumStart and //@HeliumStop :return a list of nodes """ comment_node = get_comment_node_by_annotation(doc, '@HelumStart') if not comment_node: return [] results = [] node = comment_node while node.nextSibling: node = node.nextSibling if domutil.is_element(node): results.append(node) if '@HeliumStop' in domutil.get_text_content(node): return results return None
def get_struct_alias(code): doc = domutil.get_doc_from_code(code) typedefs = doc.getElementsByTagName('typedef') if typedefs: # this is typedef struct xxx {} name; typedef = typedefs[0] type_node = domutil.get_first_child_by_tagname(typedef, 'type') struct_node = domutil.get_first_child_by_tagname(type_node, 'struct') if not struct_node: logger.warning('it is not a structure') return None alias_node = domutil.get_first_child_by_tagname(typedef, 'name') alias = domutil.get_text_content(alias_node) return alias return None
def instrument_input(self): logger.info('instrumenting input') init_code = '' input_nodes = annotation.get_input_nodes(self.doc) if not input_nodes: return for input_node in input_nodes: # input_nodes should be 'decl_stmt' type_name, var_name, _ = syntaxutil.parse_decl_stmt(input_node) if config.get('handle_array') == 'true': if '[' in type_name: text = domutil.get_text_content(input_node) self.code = self.code.replace(text, '') # We need to do the simplify self.struct_limit = 30 init_code += self.generate_input(type_name, var_name) self.code = self.code.replace('//@HeliumInputEnd', init_code, 1)
def get_input_nodes(doc): """Get nodes between //@HeliumInput and //@HeliumInputEnd :return a list of nodes """ comment_node = get_comment_node_by_annotation(doc, '@HeliumInput') if not comment_node: return [] results = [] node = comment_node while node.nextSibling: node = node.nextSibling if domutil.is_element(node): if node.tagName == 'decl_stmt': results.append(node) elif '@HeliumInputEnd' in domutil.get_text_content(node): return results return None
def get_undefined_vars(nodes, resolved): """Get undefined variable. :param nodes: a list of dom nodes :param resolved: a set of names as strings that is assumed to be defined :return a set of names """ result = set() for node in nodes: if domutil.is_element(node): if node.tagName == 'decl_stmt': _,var_name,_ = syntaxutil.parse_decl_stmt(node) resolved.add(var_name) elif node.tagName == 'expr': # in #ifdef, there may be `#elif defined(__sun)` if domutil.in_node(node, 'cpp:ifdef', level=2) or\ domutil.in_node(node, 'cpp:elif', level=2) or\ domutil.in_node(node, 'cpp:ifndef', level=2): continue name_set = syntaxutil.parse_expr(node) for name in name_set: # uint8_t, false, true, NULL if sys.resolve_single(name): continue # here we find the undefined variable if name not in resolved and name not in result: result.add(name) elif node.tagName == 'for': init_node = domutil.get_first_child_by_tagname(node, 'init') if init_node: _, var = syntaxutil.parse_for_init(init_node) if var: resolved.add(var) elif node.tagName == 'parameter_list': params = syntaxutil.parse_parameter_list(node) for _,name in params: resolved.add(name) elif node.tagName == 'cpp:define': value_node = domutil.get_first_child_by_tagname(node, 'cpp:value') text = domutil.get_text_content(value_node) match = re.findall(r'([A-Z_]{2,})', text) for m in match: if m not in resolved: result.add(m) new_result = get_undefined_vars(node.childNodes, resolved | result) result.update(new_result) return result
def build(function_node): _,func_name, fields = syntaxutil.parse_function(function_node) code = '' function_code = domutil.get_text_content(function_node) instrumented_function = function_code[:function_code.find('{')+1]+'\nprintf("%d ", 1);\n'+function_code[function_code.find('{')+1:] code += includes code += instrumented_function code += '\nint main() {\n' for type_name, var_name in fields: code += type_name + ' ' + var_name + ';\n' # if typeutil.is_primitive_type(type_name) type_component = typeutil.parse_type(type_name) if typeutil.is_primitive_type(type_component['base']): code += instrumenter.generate_primitive_input(type_name, var_name) code += func_name + '(' + ','.join([var for _,var in fields]) + ');\n' code += '}\n' return code
def parse_struct(node): """Parse a <struct> node Do not anonymous inner enum, union or structs. :return (name, [(type1, field1), (type1, field1), ...]) """ assert(domutil.is_element(node) and node.tagName == 'struct') name_node = domutil.get_first_child_by_tagname(node, 'name') block_node = domutil.get_first_child_by_tagname(node, 'block') if name_node: name = domutil.get_text_content(name_node) else: name = '' fields = [] for decl_stmt_node in domutil.get_children_by_tagname(block_node, 'decl_stmt'): decl = parse_decl_stmt(decl_stmt_node) fields.append(decl[0:2]) return (name, fields)
def sort_resolved(self): defines = set() enums = set() structs = set() typedefs = set() unions = set() variables = set() declares = set() functions = set() # fdvgetsu for key in self.resolved: name, t = key.split(".") code = self.resolved[key][0].strip() if t == "d": defines.add(code) elif t == "f": functions.add(code) elif t == "v": variables.add(code) elif t == "g": enums.add(self.resolved[key]) elif t == "e": enums.add(self.resolved[key]) elif t == "t": typedefs.add(self.resolved[key]) elif t == "s": structs.add(self.resolved[key]) elif t == "u": unions.add(self.resolved[key]) # function declarations for the functions in generate.c # these function declares should be here because the functions in support.h may also use them for function in self.functions.values(): text = domutil.get_text_content(function) decl = text[: text.find("{")].strip() + ";" if decl.count(";") > 1: continue declares.add(decl) for f in functions: decl = f[: f.find("{")].strip() + ";" # temp fix for strange syntax if decl.count(";") > 1: continue declares.add(decl) # sort struct, unions, and one-line typedefs together sut_code = sort_struct(structs | unions | typedefs | enums) # return defines+enums+sut+variables+declares+functions code = "" code += "/***** Defines ******/\n" code += "\n".join(defines) # code += '\n/***** enums ******/\n' # code += '\n'.join(enums) code += "\n/***** sut ******/\n" # code += '\n'.join(sut) code += sut_code code += "\n/***** variables ******/\n" code += "\n".join(variables) code += "\n/****** declares ******/\n" code += "\n".join(declares) code += "\n/****** functions ******/\n" code += "\n".join(functions) return code
def print_nodes(nodes): for node in nodes: print(domutil.get_text_content(node), end='')