def visit_base_type(die,dies_dict): type_info = { 'kind': 'base_type', 'byte_size': get_int(die, 'byte_size'), 'encoding': DW_ATE[get_int(die, 'encoding')], } if DEBUG: print(type_info) return type_info
def process_compile_unit(dwarf, cu, written): cu_die = cu.compile_unit c_file = cu.name # cu name is main file path statements = [] prev_decl_file = object() # Generate actual syntax tree names = {} # Defined names for dies, as references, indexed by offset for child in cu_die.children: decl_file_id = get_int(child, 'decl_file') decl_file = cu.get_file_path( decl_file_id) if decl_file_id is not None else None # TODO: usefully keep track of decl_file per (final) symbol ''' if decl_file != prev_decl_file: if decl_file == c_file: s = "Defined in compilation unit" elif decl_file is not None: s = "Defined in " + decl_file else: s = "Defined in base" statements.append(Comment("======== " + s)) ''' name = get_str(child, 'name') if name is not None: # non-anonymous if DEBUG: print("root", child.offset) if written[(child.tag, name)] != WRITTEN_FINAL: to_c_process(child, cu.dies_dict, names, statements, written) prev_decl_file = decl_file return statements
def process_compile_unit(dwarf, cu, written): cu_die = cu.compile_unit c_file = cu.name # cu name is main file path statements = [] prev_decl_file = object() # Generate actual syntax tree names = {} # Defined names for dies, as references, indexed by offset for child in cu_die.children: decl_file_id = get_int(child, 'decl_file') decl_file = cu.get_file_path(decl_file_id) if decl_file_id is not None else None # TODO: usefully keep track of decl_file per (final) symbol ''' if decl_file != prev_decl_file: if decl_file == c_file: s = "Defined in compilation unit" elif decl_file is not None: s = "Defined in " + decl_file else: s = "Defined in base" statements.append(Comment("======== " + s)) ''' name = get_str(child, 'name') if name is not None: # non-anonymous if DEBUG: print("root", child.offset) if written[(child.tag, name)] != WRITTEN_FINAL: to_c_process(child, cu.dies_dict, names, statements, written) prev_decl_file = decl_file return statements
def visit_structure_type(die,dies_dict): # enumerate members of structure or union type_info = { 'kind': DW_TAG[die.tag], 'byte_size': get_int(die, 'byte_size') } members = [] for child in die.children: name = get_str(child, 'name') member_info = { 'name': name } # handle union as "structure with all fields at offset 0" offset = 0 if 'data_member_location' in child.attr_dict: expr = child.attr_dict['data_member_location'].value assert(expr.instructions[0].opcode == DW_OP.plus_uconst) offset = expr.instructions[0].operand_1 member_info['offset'] = offset type = dies_dict.get(get_ref(child, 'type')) (type,indirection) = parse_type(type, dies_dict) member_info['indirection'] = indirection member_info['type'] = type_name(type) members.append(member_info) if DEBUG: print(member_info) worklist.append(type) type_info['members'] = members return type_info
def visit_enumeration_type(die,dies_dict): type_info = { 'kind': 'enumeration_type', 'byte_size': get_int(die, 'byte_size'), } enumerators = [] for child in die.children: if child.tag != DW_TAG.enumerator: continue enumerator_info = { 'name': get_str(child, 'name'), 'value': get_int(child, 'const_value'), } enumerators.append(enumerator_info) type_info['enumerators'] = enumerators if DEBUG: print(type_info) return type_info
def visit_array_type(die,dies_dict): type = dies_dict.get(get_ref(die, 'type')) (type,indirection) = parse_type(type, dies_dict) type_info = { 'kind': 'array_type', 'indirection': indirection, 'type': type_name(type), 'length': None } for child in die.children: if child.tag != DW_TAG.subrange_type: continue upper_bound = get_int(child, 'upper_bound') if upper_bound is not None: type_info['length'] = upper_bound + 1 if DEBUG: print(type_info) return type_info
def to_c_process(die, by_offset, names, rv, written, preref=False): if DEBUG: print("to_c_process", die.offset, preref) def get_type_ref(die, attr): ''' Get type ref for a type attribute. A type ref is a function that, given a name, constructs a syntax tree for referring to that type. ''' type_ = get_ref(die, 'type') if DEBUG: print(die.offset, "->", type_) if type_ is None: ref = base_type_ref('void') else: ref = names.get(type_) if ref is None: #ref = base_type_ref('unknown_%i' % type_) ref = to_c_process(by_offset[type_], by_offset, names, rv, written, preref=True) elif ref is ERROR: raise ValueError("Unexpected recursion") return ref names[die.offset] = typeref = ERROR( die.offset) # prevent unbounded recursion # Typeref based on name: simple name = get_str(die, 'name') if name is not None: try: prefix = TAG_NODE_CONS[die.tag](name, None) except KeyError: pass else: # store early, to allow self-reference names[die.offset] = typeref = lambda name: c_ast.TypeDecl( name, [], prefix) if preref: # early-out return typeref if die.tag == DW_TAG.enumeration_type: items = [] for enumval in die.children: assert (enumval.tag == DW_TAG.enumerator) (sname, const_value) = (not_none(get_str(enumval, 'name')), not_none(get_int(enumval, 'const_value'))) items.append(EnumItem(sname, const_value)) enum = c_ast.Enum(name, c_ast.EnumeratorList(items)) if name is None: typeref = anon_ref(enum) else: if written[(die.tag, name)] != WRITTEN_FINAL: rv.append(SimpleDecl(enum)) written[(die.tag, name)] = WRITTEN_FINAL # typedef is always final elif die.tag == DW_TAG.typedef: assert (name is not None) ref = get_type_ref(die, 'type') if written[(die.tag, name)] != WRITTEN_FINAL: rv.append(c_ast.Typedef(name, [], ['typedef'], ref(name))) written[(die.tag, name)] = WRITTEN_FINAL # typedef is always final typeref = base_type_ref(name) elif die.tag == DW_TAG.base_type: # IdentifierType if name is None: name = 'unknown_base' #?? if written[(die.tag, name)] != WRITTEN_FINAL: rv.append(Comment("Basetype: %s" % name)) written[(die.tag, name)] = WRITTEN_FINAL # typedef is always final typeref = base_type_ref(name) elif die.tag == DW_TAG.pointer_type: ref = get_type_ref(die, 'type') typeref = ptr_to_ref(ref) elif die.tag in [ DW_TAG.const_type, DW_TAG.volatile_type, DW_TAG.restrict_type ]: ref = get_type_ref(die, 'type') typeref = qualified_ref(ref, die.tag) elif die.tag in [DW_TAG.structure_type, DW_TAG.union_type]: if get_flag(die, 'declaration', False): items = None # declaration only level = WRITTEN_PREREF else: items = [] for enumval in die.children: if enumval.tag != DW_TAG.member: warning( 'Unexpected tag %s inside struct or union (die %i)' % (DW_TAG.fmt(enumval.tag), die.offset)) continue # data_member_location and bit_size / bit_offset as comment for fields bit_size = None comment = [] if 'data_member_location' in enumval.attr_dict: ml = enumval.attr_dict['data_member_location'] if ml.form in [ 'sdata', 'data1', 'data2', 'data4', 'data8' ]: comment.append("+0x%x" % ml.value) elif ml.form in ['block', 'block1']: expr = ml.value if len(expr.instructions) >= 1 and expr.instructions[ 0].opcode == DW_OP.plus_uconst: comment.append("+0x%x" % expr.instructions[0].operand_1) if 'bit_size' in enumval.attr_dict: bit_size = get_int(enumval, 'bit_size') if 'bit_offset' in enumval.attr_dict: bit_offset = get_int(enumval, 'bit_offset') comment.append('bit %i..%i' % (bit_offset, bit_offset + bit_size - 1)) if 'byte_size' in enumval.attr_dict: comment.append('of %i' % (8 * get_int(enumval, 'byte_size'))) # TODO: validate member location (alignment), bit offset if 'name' in enumval.attr_dict: ename = expect_str(enumval.attr_dict['name']) else: ename = None ref = get_type_ref(enumval, 'type') items.append( c_ast.Decl(ename, [], [], [], ref(ename), None, IntConst(bit_size), postcomment=(' '.join(comment)))) level = WRITTEN_FINAL cons = TAG_NODE_CONS[die.tag](name, items) if name is None: # anonymous structure typeref = anon_ref(cons) else: if written[(die.tag, name)] < level: rv.append(SimpleDecl(cons)) written[(die.tag, name)] = level elif die.tag == DW_TAG.array_type: subtype = get_type_ref(die, 'type') count = None for val in die.children: if val.tag == DW_TAG.subrange_type: count = get_int(val, 'upper_bound') if count is not None: count += 1 # count is upper_bound + 1 typeref = array_ref(subtype, count) elif die.tag in [DW_TAG.subroutine_type, DW_TAG.subprogram]: inline = get_int(die, 'inline', 0) returntype = get_type_ref(die, 'type') args = [] for i, val in enumerate(die.children): if val.tag == DW_TAG.formal_parameter: argtype = get_type_ref(val, 'type') argname = get_str(val, 'name', '') args.append(c_ast.Typename([], argtype(argname))) cons = lambda name: c_ast.FuncDecl(c_ast.ParamList(args), returntype(name)) if die.tag == DW_TAG.subprogram: # Is it somehow specified whether this function is static or external? assert (name is not None) if written[(die.tag, name)] != WRITTEN_FINAL: if inline: # Generate commented declaration for inlined function #rv.append(Comment('\n'.join(cons.generate()))) rv.append( Comment('inline %s' % (CGenerator().visit(SimpleDecl(cons(name)))))) else: rv.append(SimpleDecl(cons(name))) written[(die.tag, name)] = WRITTEN_FINAL else: # DW_TAG.subroutine_type typeref = cons else: # reference_type, class_type, set_type etc # variable if name is None or written[(die.tag, name)] != WRITTEN_FINAL: rv.append( Comment("Unhandled: %s\n%s" % (DW_TAG.fmt(die.tag), unistr(die)))) written[(die.tag, name)] = WRITTEN_FINAL warning("unhandled %s (die %i)" % (DW_TAG.fmt(die.tag), die.offset)) names[die.offset] = typeref return typeref
def to_c_process(die, by_offset, names, rv, written, preref=False): if DEBUG: print("to_c_process", die.offset, preref) def get_type_ref(die, attr): ''' Get type ref for a type attribute. A type ref is a function that, given a name, constructs a syntax tree for referring to that type. ''' type_ = get_ref(die, 'type') if DEBUG: print (die.offset, "->", type_) if type_ is None: ref = base_type_ref('void') else: ref = names.get(type_) if ref is None: #ref = base_type_ref('unknown_%i' % type_) ref = to_c_process(by_offset[type_], by_offset, names, rv, written, preref=True) elif ref is ERROR: raise ValueError("Unexpected recursion") return ref names[die.offset] = typeref = ERROR(die.offset) # prevent unbounded recursion # Typeref based on name: simple name = get_str(die, 'name') if name is not None: try: prefix = TAG_NODE_CONS[die.tag](name, None) except KeyError: pass else: # store early, to allow self-reference names[die.offset] = typeref = lambda name: c_ast.TypeDecl(name,[],prefix) if preref: # early-out return typeref if die.tag == DW_TAG.enumeration_type: items = [] for enumval in die.children: assert(enumval.tag == DW_TAG.enumerator) (sname, const_value) = (not_none(get_str(enumval,'name')), not_none(get_int(enumval,'const_value'))) items.append(EnumItem(sname, const_value)) enum = c_ast.Enum(name, c_ast.EnumeratorList(items)) if name is None: typeref = anon_ref(enum) else: if written[(die.tag, name)] != WRITTEN_FINAL: rv.append(SimpleDecl(enum)) written[(die.tag, name)] = WRITTEN_FINAL # typedef is always final elif die.tag == DW_TAG.typedef: assert(name is not None) ref = get_type_ref(die, 'type') if written[(die.tag, name)] != WRITTEN_FINAL: rv.append(c_ast.Typedef(name, [], ['typedef'], ref(name))) written[(die.tag, name)] = WRITTEN_FINAL # typedef is always final typeref = base_type_ref(name) elif die.tag == DW_TAG.base_type: # IdentifierType if name is None: name = 'unknown_base' #?? if written[(die.tag, name)] != WRITTEN_FINAL: rv.append(Comment("Basetype: %s" % name)) written[(die.tag, name)] = WRITTEN_FINAL # typedef is always final typeref = base_type_ref(name) elif die.tag == DW_TAG.pointer_type: ref = get_type_ref(die, 'type') typeref = ptr_to_ref(ref) elif die.tag in [DW_TAG.const_type, DW_TAG.volatile_type]: ref = get_type_ref(die, 'type') typeref = qualified_ref(ref, die.tag) elif die.tag in [DW_TAG.structure_type, DW_TAG.union_type]: if get_flag(die, 'declaration', False): items = None # declaration only level = WRITTEN_PREREF else: items = [] for enumval in die.children: if enumval.tag != DW_TAG.member: warning('Unexpected tag %s inside struct or union (die %i)' % (DW_TAG.fmt(enumval.tag), die.offset)) continue # data_member_location and bit_size / bit_offset as comment for fields bit_size = None comment = [] if 'data_member_location' in enumval.attr_dict: expr = enumval.attr_dict['data_member_location'].value assert(expr.instructions[0].opcode == DW_OP.plus_uconst) comment.append("+0x%x" % expr.instructions[0].operand_1) if 'bit_size' in enumval.attr_dict: bit_size = get_int(enumval, 'bit_size') if 'bit_offset' in enumval.attr_dict: bit_offset = get_int(enumval, 'bit_offset') comment.append('bit %i..%i' % (bit_offset, bit_offset+bit_size-1)) if 'byte_size' in enumval.attr_dict: comment.append('of %i' % (8*get_int(enumval, 'byte_size'))) # TODO: validate member location (alignment), bit offset ename = expect_str(enumval.attr_dict['name']) ref = get_type_ref(enumval, 'type') items.append(c_ast.Decl(ename,[],[],[], ref(ename), None, IntConst(bit_size), postcomment=(' '.join(comment)))) level = WRITTEN_FINAL cons = TAG_NODE_CONS[die.tag](name, items) if name is None: # anonymous structure typeref = anon_ref(cons) else: if written[(die.tag,name)] < level: rv.append(SimpleDecl(cons)) written[(die.tag,name)] = level elif die.tag == DW_TAG.array_type: subtype = get_type_ref(die, 'type') count = None for val in die.children: if val.tag == DW_TAG.subrange_type: count = get_int(val, 'upper_bound') if count is not None: count += 1 # count is upper_bound + 1 typeref = array_ref(subtype, count) elif die.tag in [DW_TAG.subroutine_type, DW_TAG.subprogram]: inline = get_int(die, 'inline', 0) returntype = get_type_ref(die, 'type') args = [] for i,val in enumerate(die.children): if val.tag == DW_TAG.formal_parameter: argtype = get_type_ref(val, 'type') argname = get_str(val, 'name', '') args.append(c_ast.Typename([], argtype(argname))) cons = lambda name: c_ast.FuncDecl(c_ast.ParamList(args), returntype(name)) if die.tag == DW_TAG.subprogram: # Is it somehow specified whether this function is static or external? assert(name is not None) if written[(die.tag,name)] != WRITTEN_FINAL: if inline: # Generate commented declaration for inlined function #rv.append(Comment('\n'.join(cons.generate()))) rv.append(Comment('inline %s' % (CGenerator().visit(SimpleDecl(cons(name)))))) else: rv.append(SimpleDecl(cons(name))) written[(die.tag,name)] = WRITTEN_FINAL else: # DW_TAG.subroutine_type typeref = cons else: # reference_type, class_type, set_type etc # variable if name is None or written[(die.tag,name)] != WRITTEN_FINAL: rv.append(Comment("Unhandled: %s\n%s" % (DW_TAG.fmt(die.tag), unistr(die)))) written[(die.tag,name)] = WRITTEN_FINAL warning("unhandled %s (die %i)" % (DW_TAG.fmt(die.tag), die.offset)) names[die.offset] = typeref return typeref