def __init__(self, coredump_lines, processor): ChipData.ChipData.__init__(self) self.coredump_lines = coredump_lines self.chip_id = None # Global chip version (e.g. 12280000) # string containing one of "Bluecore" or Hydra" self.chip_architecture = None self.kalimba_architecture = None # integer, e.g. 3, 4, 5. self.chip_revision = None # the coredump must be read twice from each processors' perspective self.processor = processor self.firmware_id = 0 # Firmware ID integer. self.firmware_id_string = "" # Firmware ID string. # Dictionary that will contain PM RAM (if it is tagged as 'PM' in the # coredump) self.pm = {} # Dictionary that will contain all of the data from the coredump self.data = {} # (stuff marked as 'data' or memory-mapped registers; this could # be DM-mapped PM in the case of HydraCore chips). self.registers = {} # Dictionary that holds all processor registers self.banked_registers = [] # addr_per_word is 1 for all the supported chips, except Crescendo self.addr_per_word = 1 self.is_banked_register = False self.ignore_processor = False # parse the coredump self._read_xcd_file(iter(self.coredump_lines)) # Finished parsing the audio section. Set the chip architecture # for the rest of the tool. Arch.chip_select( self.kalimba_architecture, self.chip_architecture, self.chip_id, self.chip_revision ) # Now we do some complicated things to accommodate # architecture-specific stuff. if (self.kalimba_architecture in (4, 5) and self.chip_architecture == "Hydra"): # The DM1 RAM range is also aliased at the DM2 range # Only the DM1 data is in the coredump, so make a copy for easy # analysis dm2 = {} for addr in self.data: if Arch.get_dm_region(addr) == "DM1RAM": dm2[addr + Arch.dRegions['DM2RAM'][0]] = self.data[addr] self.data.update(dm2) logger.info('Coredump parsed')
def _decode_log(self): """Same as decode_log, but is not thread safe.""" # Form the list on a temporary list so we don't duplicate old entries tmp_debug_log = [] # Read all the debug variables. buffer_wrp = self.chipdata.get_data(self.p_buffer_wrp) debug_buffer = self.chipdata.get_data(self.p_buffer, self.buffer_size) if buffer_wrp == self.last_read + 1: # Nothing new in the log so don't bother reading it return # The debug buffer looks like this: # # [appppaapapaa...............ppp] # ^ ^ ^ # debug_buffer buffer_wrp len(debug_buffer) # # p = string pointer, arguments = string argument # Decoding is more complicated than you might think, because: # * We can't assume that a value > $flash.debugdata.__Base is a string # pointer (it could just be a large number) # * String pointers can in some circumstances be arguments to other # strings (more details below). # * The buffer may not be completely full # * The buffer may have wrapped unevenly, so that the first few # arguments after buffer_wrp are orphaned from the corresponding # string. # # So what we need to do is work backwards from buffer_wrp, find the # last string pointer, and count the number of format specifiers # in it to check that it matches the number of arguments. arguments = [] # list of args for each log statement i = buffer_wrp - 1 # start with the newest entry in the buffer last_read = i # record how far we will read up to this time wrapped = False while True: # Check for a wrap/repeat if i < 0: i = len(debug_buffer) - 1 wrapped = True if wrapped and i <= buffer_wrp: # we've parsed the whole buffer break # We need to look for things that look like pointers to debug # strings. region = Arch.get_dm_region(debug_buffer[i], False) if region == 'DEBUG' or region == 'DBG_DWL' or region == 'DBG_PTCH': mystr = self._get_formatter_string(debug_buffer[i], arguments) # Now it could just so happen that this value is an argument # to printf AND a valid debug string pointer, like: # AUDIO_LOG_STRING(hi, "hello"); # L0_DBG_MSG1("foo: %s", (DBG_STR)hi); # (This construct is especially prevalent in isp_router). # # We can check that the string pointed-to has the same number # of arguments as we have in a[], but that doesn't actually # help in this case since it doesn't allow us to distinguish # between a string that simply has no arguments, and a string # that is an argument to another string. # To avoid painful look-ahead, the simple way to solve this # is to just assume every complete-looking string is actually # complete, and put it into tmp_debug_log[]. If we then come # across a string that doesn't seem to have enough # arguments, we backtrack. Note that we only support one # level of nesting here; if someone has put a formatted # string inside a formatted string then all bets are off. if mystr.count('%') == 0 and arguments: # This could happen if you had something like: # L0_DBG_MSG1("foo: %s %d", (DBG_STR)hi, 0x1234) # This string is clearly just actually an argument. arguments.append(mystr) i -= 1 continue if mystr.count('%') > len(arguments): # We're missing some arguments. We probably put them into # tmp_debug_log[], thinking they were complete strings. num_args_missing = mystr.count('%') - len(arguments) if num_args_missing == mystr.count('%s'): temp_arguments = tmp_debug_log[-num_args_missing:] if Arch.addr_per_word != 4: # For Hydra platforms, like Crescendo and Aura # the arguments in tmp_debug_log[] are in the # right order, because the order in which the # arguments are stored. For Blucore platforms, # like Gordon and Rick, the array slice is in # the wrong order. temp_arguments.reverse() arguments = temp_arguments + arguments # now arguments[] contains what it would have held, # had we not put the missing args into # tmp_debug_log[]. Last thing to do is disavow them. tmp_debug_log[-num_args_missing:] = [] # Now we should have the right number of arguments! str_cnt = mystr.count('%') # Assume this is a complete log. # shrink-wrap the string we just read, and save it. arguments.reverse() # because we added args in reverse order. # format the string. formatter_error = False if str_cnt == len(arguments): try: formatted_str = self._format_string(mystr, arguments) except ValueError: # ValueError can be caused by erroneous formatting # string like a "0x%08" formatter_error = True else: formatter_error = True if formatter_error: formatted_str = ( "\n@@@@ ERROR: Wrong number of arguments! " + "This could be caused by buffer tear. \n" + "Buffer tear happens when messages are " + "written too fast to the log buffer\n" + "and the debug interface cannot keep up in reading them. \n" + " formatter string: %s\n" % (mystr.replace("\n", "")) + " arguments: %s\n" % (cu.list_to_string(arguments))) # Add the formatted string to the debug log. tmp_debug_log.append(formatted_str) # Clear the arguments for the next run. arguments = [] else: # Found an argument. arguments.append(debug_buffer[i]) i -= 1 # next if i == self.last_read: # We've reached where we got to break self.last_read = last_read # invert the debug log so it is printed in the correct order tmp_debug_log.reverse() return tmp_debug_log
def _get_formatter_string(self, string_ptr, arguments): """Searches for the formatter string in the debug information. Also checks the downloadable capabilities. Args: string_ptr: Pointer to the formatter string arguments: Arguments for the formatter string. Note this can change. """ region = Arch.get_dm_region(string_ptr, False) if region == 'DBG_DWL': return_address = -1 try: if arguments: # for downloadable capabilities the first argument to the # debug logging is the return address for the debug log # call. This return address is used to identify the # downloadable capability. return_address = arguments[0] elf_id = self.debuginfo.table.get_elf_id_from_address( return_address) if elf_id is not None: self.current_elf_id = elf_id arguments.pop(0) # The linker will put the debug messages for downloadable # capabilities to 0x15500000, but the elf for some reason # leaves it in 0x13500000. Count for the difference # 0x15500000 - 0x13500000 = 0x2000000# download_bundle = self.debuginfo.debug_infos[ self.current_elf_id] mystr = (download_bundle.debug_strings[string_ptr - 0x2000000]) except KeyError: mystr = ("@@@@ ERROR: Cannot find %s debug string" + " in downloadable capability (capability elf id %s)." ) % (hex(string_ptr), hex(self.current_elf_id)) elif region == 'DBG_PTCH': # Look at the patch elf. # No address conversation is needed; the patch is directly mapped. patch = self.debuginfo.get_patch_debuginfo() if patch is None: mystr = ("@@@@ ERROR: Cannot find %s debug string" + " because patch not loaded.") % (hex(string_ptr)) else: try: # The linker will put the debug messages for the patch # to 0x14500000, but in the elf it will be in 0x13500000. # Count for the difference: # 0x14500000 - 0x13500000 = 0x1000000 mystr = patch.debug_strings[string_ptr - 0x1000000] except KeyError: mystr = ("@@@@ ERROR: Cannot find %s debug string" + " in patch.") % (hex(string_ptr)) else: # It's almost certainly a valid string pointer. # Still a remote chance that we have been very unlucky and hit # a numerical argument which coincides with a string address; # if so we'll hit an exception when we do 'mystr % # tuple(arguments)' below. try: kymera = self.debuginfo.get_kymera_debuginfo() mystr = kymera.debug_strings[string_ptr] except KeyError: # invalid pointer. This is probably caused by a buffer tear. mystr = None if mystr is None: mystr = "@@@@ ERROR: Cannot find %s debug string." % hex( string_ptr) return mystr
def get_var(self, identifier, elf_id=None, datalen=None): """Get a variable. Like Analysis.get_var_strict(), except it's not strict (!) 'identifier' can be a variable name, or address. If it's a name, we attempt to find the closest match in our list of variables. In this case the user can also provide a data length; if it is set, we return a slice of data, 'datalen' addressable units long starting at the address specified by 'identifier'. Args: identifier: Could be name or address. elf_id (int, optional): The bundle elf id if the variable is in a downloadable capability. datalen: If the identifier is an address the data length is specified by this input. Returns: A Variable. Raises: AmbiguousSymbolError: If more than one match is found. """ # For Crescendo, data can only be fetched as words. Since it is # octet-addressed, the addresses must be divisible with the number of # addresses per word (32 bit words - 4 octets, therefore addresses must # be divisible with 4). if isinstance(identifier, numbers.Integral): identifier = cu.get_correct_addr(identifier, Arch.addr_per_word) # Same as above. The lengths are measured in addressable units. if datalen is not None: datalen = cu.convert_byte_len_word(datalen, Arch.addr_per_word) # The following is necessary since we can't rely on variable # sizes. If a (say) register address was passed in here we will likely # match a variable entry for $flash.data24.__Limit. if isinstance(identifier, numbers.Integral) and \ Arch.get_dm_region(identifier) == "MMR": return None # First, look up the variable in the debug information. # Even if the user supplied an address rather than a name, it's nice # if we can tell them which variable it might be part of. # Might throw an AmbiguousSymbolError exception here; can't get that # with an address but can with a variable name. var = None try: var = self.debuginfo.get_var(identifier, elf_id) except AmbiguousSymbolError as amb: # Filter out matches of struct/array members, where their parent is # also in the list of matches. matches = amb.args[1] quarantine_list = [] for match in matches: try: mvar = self.debuginfo.get_var_strict( match["name"], match["elf_id"]) if mvar.parent is not None and mvar.parent.name in matches: # This is a struct/array member quarantine_list.append(match) else: possible_parent = mvar except ValueError: # Out of memory can be seen for asm memory reagions. Ignore # them. quarantine_list.append(match) # If the number of things in the quarantine list is EXACTLY # ONE MORE than the number of things in the matches list, then # we probably have found a single variable and all its # members. if len(matches) == len(quarantine_list) + 1: var = possible_parent else: # Give up raise AmbiguousSymbolError(amb.args[0], amb.args[1]) if var is None: return None # Don't necessarily want to modify the actual variable entry below*, # so maybe create a copy here. # * Why? Well var is just a reference to the original variable in the # debuginfo class - we ought not to change it frivolously, since it # could break some other analysis. # In this case, we don't want to permanently munge the name # just because we're doing a slice this time. ret_var = var if datalen: if isinstance(identifier, numbers.Integral): if var.address == identifier and var.size <= datalen: ret_var = copy.deepcopy(var) ret_var.name = "User-defined slice, part of: " + \ var.name + " ???" # We want to get a slice of data, not just the variable # entry. ret_var.size = datalen # If the identifier is a variable name, don't include any # members we might have already inspected. ret_var.members = None else: ret_var = ct.Variable("???", identifier, datalen) else: # Mitigation: we can't rely on 'var' actually containing the # supplied address, due to the lack of size information (see # KerDebugInfo.py). So work around it here. if (isinstance(identifier, numbers.Integral) and identifier >= var.address + Arch.addr_per_word * var.size): # Just return the value at the given address. ret_var = ct.Variable(var.name + " ???", identifier, 1) # Now get the data value(s) from chipdata. Look in DM first, only # try const if we run out of options. try: ret_var.value = self.chipdata.get_data(ret_var.address, ret_var.size) except InvalidDmLengthError as oor: # Address was valid, but size was not. # oor.args[1] contains the last valid address in the supplied # range. valid_size = (oor.max_length - ret_var.address) + 1 ret_var.value = self.chipdata.get_data(ret_var.address, valid_size) ret_var.size = valid_size except InvalidDmAddressError: # The address wasn't valid. Could be that this variable is # actually in dm const. try: ret_var.value = self.debuginfo.get_dm_const( ret_var.address, ret_var.size) except InvalidDmConstLengthError as oor: # Address was valid, but size was not. valid_size = oor.max_length - ret_var.address ret_var.value = self.debuginfo.get_dm_const( ret_var.address, valid_size) except InvalidDmConstAddressError: # Ok we really are stuck. Return variable with a value of None. debug_info = self.debuginfo.get_kymera_debuginfo() ret_var.value = debug_info.debug_strings[ret_var.address] return ret_var # Need a way to work out whether we've already inspected this # variable, so we can avoid doing it more than once. # An inspection *should* result in a non-empty type_name string. # Also, don't inspect the slices. It would be bad. ret_var.members = [] var_elf_id = self.debuginfo.table.get_elf_id_from_address( ret_var.address) if not var_elf_id: var_elf_id = self.debuginfo.get_kymera_debuginfo().elf_id self.debuginfo.inspect_var(ret_var, var_elf_id) return ret_var
def get_reg(self, identifier): """Get register. Like Analysis.get_reg_strict(), except it's not strict (!) 'identifier' can be a register name, or address. If it's a name, we attempt to find the closest match in our list of registers. Args: identifier Returns: a DataSym instance. Raises: AmbiguousSymbolError: If more than one match is found. """ reg = None # Will be a ConstSym. # If the user supplied an address, and it smells like a register, # attempt to look it up. if isinstance(identifier, numbers.Integral): if Arch.get_dm_region(identifier) == "MMR": # Look for constants that have a value of the supplied # address. Inherently risky, since any random constant # could have a value that looks like a register address. # Since we only do this to set the name, it should be ok. possible_regs = [ item[0] for item in list(self.debuginfo.constants.items()) if item[1] == identifier ] if possible_regs: reg_name = " or ".join(possible_regs) reg = ct.ConstSym(reg_name, identifier) else: # Look up the supplied name in our list of constants. If the # name is found, reg.value is actually going to be the address # of the register. # get_constant might throw an AmbiguousSymbolError exception # here; in this case we want to catch it, and weed out any # matches that aren't register names. try: if 'regfile' in identifier: return self.chipdata.get_reg_strict(identifier) return self.chipdata.get_reg_strict('regfile_' + identifier) except KeyError: pass except BaseException: # This shoud be on UnknownRegister but is too hard to import pass try: reg = self.debuginfo.get_constant(identifier) except AmbiguousSymbolError as ambs: # We helpfully store the ambiguous matches in the exception # args ambiguous_matches = ambs.args[1] actual_ambiguous_matches = [] for match in ambiguous_matches: amconst = self.debuginfo.get_constant_strict( match["name"], match["elf_id"]) if Arch.get_dm_region(amconst.value, False) == "MMR": actual_ambiguous_matches.append(match) if not actual_ambiguous_matches: # We actually ended up finding no real registers reg = None else: # If all the matches are aliases for each other, we can # return that value. if they're different, # admit our mistake. val = self.debuginfo.get_constant_strict( actual_ambiguous_matches[0]["name"], actual_ambiguous_matches[0]["elf_id"]) # The first is always the same with the first. success = True # Skip the first which is used to check against. for match in actual_ambiguous_matches[1:]: try: variable = self.debuginfo.get_constant_strict( match["name"], match["elf_id"]) if val != variable: success = False break # Todo remevoe this if B-242063 is corrected. except BaseException: success = False break if success: # We actually got only one register match - work with # it. reg = self.debuginfo.get_constant_strict( actual_ambiguous_matches[0]["name"]) else: apology = "Multiple potential matches found " + \ "for register name '" + identifier + "': " raise AmbiguousSymbolError(apology, actual_ambiguous_matches) try: if reg and (Arch.get_dm_region(reg.value) != "MMR"): # Reg.value wasn't the address of a memory-mapped # register; it was probably a random symbolic # constant. Oh well. return None except Arch.NotDmRegion: if reg.value == 0xfffffff: # For Crescendo, it has been noticed that the register are # being treated as constants with the value 0xfffffff. # Furthermore, must strip the C and asm specific symbols # for get_reg_strict(). try: if '$_' in reg.name: reg_name = reg.name[2:] return self.chipdata.get_reg_strict(reg_name) elif '$' in reg.name: reg_name = reg.name[1:] return self.chipdata.get_reg_strict(reg_name) except BaseException: return self.chipdata.get_reg_strict(reg.name) if reg is None: return None # If we get here, we found something. # There's a small chance we've got an unrelated constant, if its # value looks sufficiently like the address of a memory-mapped # register (e.g. $codec.stream_decode.FAST_AVERAGE_SHIFT_CONST). # Can't do much about that. # Look up the register contents. try: regcontents = self.chipdata.get_data(reg.value) fullreg = ct.DataSym(reg.name, reg.value, regcontents) except KeyError: # Reg.value wasn't the address of a register, for some reason. fullreg = None return fullreg
def _alloc_blocks(self, heap_address, heap_size, memory_type="dm"): """Reads and checks the allocated blocks. Args: heap_address: The heap start address. heap_size: The heap size. memory_type (str, optional) Returns: Two lists. One for the heap allocations the other for debug information. """ magic_val = 0xabcd01 alloc_info = [] debug_info = [] try: (heap_data, magic_offset) = self._get_heap_and_magic_offset( heap_address, heap_size, memory_type) except InvalidDmAddressError: self.formatter.error( "Address 0x%x in %s cannot be access. " "Heap cannot be analysed." % (heap_address, str(Arch.get_dm_region(heap_address, False)))) return alloc_info, debug_info total = 0 # here index is the index of the magic word in heap_data. Since in # heap_pm we are working with 32-bit words and not strictly addresses, # for start, index should actually be # -Arch.addr_per_word/Arch.arrd_per_word, which is -1 index = -1 # Search through the entire heap block, looking for allocated blocks # based on the presence of the magic word while True: try: index = index + heap_data[index + 1:].index(magic_val) + 1 address = heap_address + \ (index - magic_offset) * Arch.addr_per_word (length, magic, owner_hint) = \ self._read_alloc_block(address, memory_type) if magic != magic_val: raise AnalysisError( "Magic word not found at expected offset.") # Check if we are still in valid region if (length > 0) and \ (address + length < heap_address + heap_size): alloc_info.append( "Allocated block size : " + cu.mem_size_to_string(length, "o") + " at address: 0x{0:0>8x}".format(address)) index = index + length // Arch.addr_per_word total = total + length if self.pmalloc_debug_enabled: debug_info.append( "Ptr: 0x{0:0>8x} size: ".format(address) + cu.mem_size_to_string(length, "o") + " allocated by: {0:s}".format(owner_hint)) except ValueError: break alloc_info.append("Total heap allocation : " + cu.mem_size_to_string(total, "ow")) return alloc_info, debug_info
def _free_blocks(self, address, heap_start, heap_size, memory_type="dm"): """Checks the free blocks. Args: address: Address to start with. heap_start heap_size memory_type (str, optional) Returns: A list describing the free memory allocations. """ free_blocks_info = [] address_history = [] total_size = 0 while address != 0: # Avoid infinite loop by checking if the node was already checked. if address not in address_history: address_history.append(address) else: self.formatter.error("Repeating nodes with address 0x%x. " "Probably memory corruption" % address) return FreeBlocks(total_size, free_blocks_info) if memory_type == "dm": try: freeblock = self.chipdata.cast(address, 'mem_node') freeblock_size = freeblock.get_member('length').value except InvalidDmAddressError: self.formatter.error( "Address 0x%x in %s cannot be access. " "Heap cannot be analysed." % (address, str(Arch.get_dm_region(address, False)))) return FreeBlocks(total_size, free_blocks_info) elif memory_type == "pm": freeblock = self.chipdata.cast(address, 'mem_node_pm', False, 'PM') freeblock_size = freeblock.get_member( 'struct_mem_node').get_member( 'length_32').value * Arch.addr_per_word else: raise FatalAnalysisError("Memory type %s not recognised." % memory_type) # verify if the address is valid if self.is_address_valid(address): # If the list node belongs to the current analysed heap # display info end_of_heap = heap_start + heap_size if (address >= heap_start) and (address <= end_of_heap): desc_str = ("Free block size : " + cu.mem_size_to_string(freeblock_size, "o") + " at address: 0x{0:0>8x}".format(address)) free_blocks_info.append(desc_str) total_size += freeblock_size else: raise FatalAnalysisError(" 0x%x is out of %s heap memory !" % (address, memory_type)) if memory_type == "dm": address = freeblock.get_member('u').get_member('next').value elif memory_type == "pm": address = freeblock.get_member('struct_mem_node').get_member( 'u').get_member('next').value free_blocks_info.append("Total heap free : " + cu.mem_size_to_string(total_size, "ow")) return FreeBlocks(total_size, free_blocks_info)