def find_magic_file(self, fname, system_only=False, user_only=False): ''' Finds the specified magic file name in the system / user magic file directories. @fname - The name of the magic file. @system_only - If True, only the system magic file directory will be searched. @user_only - If True, only the user magic file directory will be searched. If system_only and user_only are not set, the user directory is always searched first. Returns the path to the file on success; returns None on failure. ''' loc = None if not system_only: fpath = self._user_path(self.BINWALK_MAGIC_DIR, fname) if os.path.exists(fpath) and common.file_size(fpath) > 0: loc = fpath if loc is None and not user_only: fpath = self._system_path(self.BINWALK_MAGIC_DIR, fname) if os.path.exists(fpath) and common.file_size(fpath) > 0: loc = fpath return fpath
def hexdiff(self, file_names, length=0x100, offset=0, block=16, first=False): if not length and len(file_names) > 0: length = file_size(file_names[0]) if not block: block = 16 HexDiff(self).display(file_names, offset=offset, size=length, block=block, show_first_only=first)
def extract(self, offset, description, file_name, size, name=None): ''' Extract an embedded file from the target file, if it matches an extract rule. Called automatically by Binwalk.scan(). @offset - Offset inside the target file to begin the extraction. @description - Description of the embedded file to extract, as returned by libmagic. @file_name - Path to the target file. @size - Number of bytes to extract. @name - Name to save the file as. Returns the name of the extracted file (blank string if nothing was extracted). ''' fname = '' cleanup_extracted_fname = True original_dir = os.getcwd() rules = self._match(description) # No extraction rules for this file if not rules: return if not os.path.exists(self.extract_path): os.mkdir(self.extract_path) file_path = os.path.realpath(file_name) if os.path.isfile(file_path): os.chdir(self.extract_path) # Loop through each extraction rule until one succeeds for i in range(0, len(rules)): rule = rules[i] # Copy out the data to disk, if we haven't already fname = self._dd(file_path, offset, size, rule['extension'], output_file_name=name) # If there was a command specified for this rule, try to execute it. # If execution fails, the next rule will be attempted. if rule['cmd']: # Many extraction utilities will extract the file to a new file, just without # the file extension (i.e., myfile.7z -> myfile). If the presumed resulting # file name already exists before executing the extract command, do not attempt # to clean it up even if its resulting file size is 0. if self.remove_after_execute: extracted_fname = os.path.splitext(fname)[0] if os.path.exists(extracted_fname): cleanup_extracted_fname = False # Execute the specified command against the extracted file extract_ok = self.execute(rule['cmd'], fname) # Only clean up files if remove_after_execute was specified if extract_ok and self.remove_after_execute: # Remove the original file that we extracted try: os.unlink(fname) except: pass # If the command worked, assume it removed the file extension from the extracted file # If the extracted file name file exists and is empty, remove it if cleanup_extracted_fname and os.path.exists(extracted_fname) and file_size(extracted_fname) == 0: try: os.unlink(extracted_fname) except: pass # If the command executed OK, don't try any more rules if extract_ok: break # Else, remove the extracted file if this isn't the last rule in the list. # If it is the last rule, leave the file on disk for the user to examine. elif i != (len(rules)-1): try: os.unlink(fname) except: pass # If there was no command to execute, just use the first rule else: break os.chdir(original_dir) # If a file was extracted, return the full path to that file if fname: fname = os.path.join(self.extract_path, fname) return fname
def single_scan(self, target_file='', fd=None, offset=0, length=0, show_invalid_results=False, callback=None, plugins_whitelist=[], plugins_blacklist=[]): ''' Performs a binwalk scan on one target file or file descriptor. @target_file - File to scan. @fd - A common.BlockFile object. @offset - Starting offset at which to start the scan. @length - Number of bytes to scan. Specify -1 for streams. @show_invalid_results - Set to True to display invalid results. @callback - Callback function to be invoked when matches are found. @plugins_whitelist - A list of plugin names to load. If not empty, only these plugins will be loaded. @plugins_blacklist - A list of plugin names to not load. The callback function is passed two arguments: a list of result dictionaries containing the scan results (one result per dict), and the offset at which those results were identified. Example callback function: def my_callback(offset, results): print "Found %d results at offset %d:" % (len(results), offset) for result in results: print "\t%s" % result['description'] binwalk.Binwalk(callback=my_callback).scan("firmware.bin") Upon completion, the scan method returns a sorted list of tuples containing a list of results dictionaries and the offsets at which those results were identified: scan_results = [ (0, [{description : "LZMA compressed data..."}]), (112, [{description : "gzip compressed data..."}]) ] See SmartSignature.parse for a more detailed description of the results dictionary structure. ''' scan_results = {} fsize = 0 jump_offset = 0 i_opened_fd = False i_loaded_plugins = False plugret = PLUGIN_CONTINUE plugret_start = PLUGIN_CONTINUE self.total_read = 0 self.total_scanned = 0 self.scan_length = length self.filter.show_invalid_results = show_invalid_results self.start_offset = offset # Check to make sure either a target file or a file descriptor was supplied if not target_file and fd is None: raise Exception("Must supply Binwalk.single_scan with a valid file path or BlockFile object") # Need the total size of the target file, even if we aren't scanning the whole thing if target_file: fsize = file_size(target_file) # If no length was specified, make the length the size of the target file minus the starting offset if self.scan_length == 0: self.scan_length = fsize - offset # Open the target file and seek to the specified start offset if fd is None: fd = BlockFile(target_file, length=self.scan_length, offset=offset) i_opened_fd = True # If offset is negative (bytes from EOF), BlockFile class will autmoatically calculate the right offset offset = fd.offset # Seek to the starting offset. #fd.seek(offset) # If the Plugins class has not already been instantitated, do that now. if self.plugins is None: self.plugins = Plugins(self, blacklist=plugins_blacklist, whitelist=plugins_whitelist) i_loaded_plugins = True if self.load_plugins: self.plugins._load_plugins() # Invoke any pre-scan plugins plugret_start = self.plugins._pre_scan_callbacks(fd) # Load the default signatures if self.load_signatures has not already been invoked if self.magic is None: self.load_signatures() # Main loop, scan through all the data while not ((plugret | plugret_start) & PLUGIN_TERMINATE): i = 0 # Read in the next block of data from the target file and make sure it's valid (data, dlen) = fd.read_block() if not data or dlen == 0: break # The total number of bytes scanned could be bigger than the total number # of bytes read from the file if the previous signature result specified a # jump offset that was beyond the end of the then current data block. # # If this is the case, we need to index into this data block appropriately in order to # resume the scan from the appropriate offset. # # Don't update dlen though, as it is the literal offset into the data block that we # are to scan up to in this loop iteration. It is also appended to self.total_scanned, # which is what we want (even if we have been told to skip part of the block, the skipped # part is still considered part of the total bytes scanned). if jump_offset > 0: total_check = self.total_scanned + dlen # Is the jump offset beyond the total amount of data that we've currently read in (i.e., in a future data block)? if jump_offset >= total_check: i = -1 # Try to seek to the jump offset; this won't work if fd == sys.stdin try: fd.seek(jump_offset) self.total_read = jump_offset self.total_scanned = jump_offset - dlen except: pass # Is the jump offset inside this block of data? elif jump_offset > self.total_scanned and jump_offset < total_check: # Index into this block appropriately; jump_offset is the file offset that # we need to jump to, and self.total_scanned is the file offset that starts # the beginning of the current block i = jump_offset - self.total_scanned # We're done with jump_offset, zero it out for the next round jump_offset = 0 # Scan through each block of data looking for signatures if i >= 0 and i < dlen: # Scan this data block for a list of offsets which are candidates for possible valid signatures. # Signatures could be split across the block boundary; since data conatins 1KB more than dlen, # pass up to dlen+MAX_SIGNATURE_SIZE to find_signature_candidates, but don't accept signatures that # start after the end of dlen. for candidate in self.parser.find_signature_candidates(data[i:dlen+self.MAX_SIGNATURE_SIZE], (dlen-i)): # If a previous signature specified a jump offset beyond this candidate signature offset, ignore it if (i + candidate + self.total_scanned) < jump_offset: continue # Reset these values on each loop smart = {} results = [] results_offset = -1 # In python3 we need a bytes object to pass to magic.buffer candidate_data = str2bytes(data[i+candidate:i+candidate+fd.MAX_TRAILING_SIZE]) # Pass the data to libmagic, and split out multiple results into a list for magic_result in self.parser.split(self.magic.buffer(candidate_data)): i_set_results_offset = False # Some signatures need to take into account the length of a given string # when specifying additional offsets. Parse the string-len keyword to adjust # for this prior to calling self.smart.parse. magic_result = self.smart._parse_string_len(magic_result) # Some file names are not NULL byte terminated, but rather their length is # specified in a size field. To ensure these are not marked as invalid due to # non-printable characters existing in the file name, parse the filename(s) and # trim them to the specified filename length, if one was specified. magic_result = self.smart._parse_raw_strings(magic_result) # Invoke any pre-parser callback plugin functions if not (plugret_start & PLUGIN_STOP_PLUGINS): raw_result = {'description' : magic_result} plugret = self.plugins._scan_pre_parser_callbacks(raw_result) magic_result = raw_result['description'] if (plugret & PLUGIN_TERMINATE): break # Make sure this is a valid result before further processing if not self.filter.invalid(magic_result): # The smart filter parser returns a dictionary of keyword values and the signature description. smart = self.smart.parse(magic_result) # Validate the jump value and check if the response description should be displayed if self._is_valid(smart, candidate+i, fsize): # If multiple results are returned and one of them has smart['jump'] set to a non-zero value, # the calculated results offset will be wrong since i will have been incremented. Only set the # results_offset value when the first match is encountered. if results_offset < 0: results_offset = offset + i + candidate + smart['adjust'] + self.total_scanned i_set_results_offset = True # Double check to make sure the smart['adjust'] value is sane. # If it makes results_offset negative, then it is not sane. if results_offset >= 0: smart['offset'] = results_offset # Invoke any scan plugins if not (plugret_start & PLUGIN_STOP_PLUGINS): plugret = self.plugins._scan_callbacks(smart) results_offset = smart['offset'] if (plugret & PLUGIN_TERMINATE): break # Extract the result, if it matches one of the extract rules and is not a delayed extract. if self.extractor.enabled and not (self.extractor.delayed and smart['delay']) and not ((plugret | plugret_start) & PLUGIN_NO_EXTRACT): # If the signature did not specify a size, extract to the end of the file. if not smart['size']: smart['size'] = fsize-results_offset smart['extract'] = self.extractor.extract( results_offset, smart['description'], target_file, smart['size'], name=smart['name']) if not ((plugret | plugret_start) & PLUGIN_NO_DISPLAY): # This appears to be a valid result, so append it to the results list. results.append(smart) elif i_set_results_offset: results_offset = -1 # Did we find any valid results? if results_offset >= 0: scan_results[results_offset] = results if callback is not None: callback(results_offset, results) # If a relative jump offset was specified, update the absolute jump_offset variable if has_key(smart, 'jump') and smart['jump'] > 0: jump_offset = results_offset + smart['jump'] # Track the total number of bytes scanned self.total_scanned += dlen # The starting offset only affects the reported offset for results # in the first block of data. Zero it out after the first block has # been processed. offset = 0 # Sort the results before returning them scan_items = list(scan_results.items()) scan_items.sort() # Do delayed extraction, if specified. if self.extractor.enabled and self.extractor.delayed: scan_items = self.extractor.delayed_extract(scan_items, target_file, fsize) # Invoke any post-scan plugins #if not (plugret_start & PLUGIN_STOP_PLUGINS): self.plugins._post_scan_callbacks(fd) # Be sure to delete the Plugins instance so that there isn't a lingering reference to # this Binwalk class instance (lingering handles to this Binwalk instance cause the # __del__ deconstructor to not be called). if i_loaded_plugins: del self.plugins self.plugins = None if i_opened_fd: fd.close() return scan_items
def __init__(self, file_name, binwalk=None, length=0, offset=0, n=MIN_STRING_LENGTH, block=DEFAULT_ENTROPY_BLOCK, algorithm='gzip', plugins=None): ''' Class constructor. Preferred to be invoked from the Strings class instead of directly. @file_name - The file name to perform a strings analysis on. @binwalk - An instance of the Binwalk class. @length - The number of bytes in the file to analyze. @offset - The starting offset into the file to begin analysis. @n - The minimum valid string length. @block - The block size to use iwhen performing entropy analysis. Set to None to skip entropy analysis. @algorithm - The entropy algorithm to use when performing entropy analysis. @plugins - An instance of the Plugins class. Returns None. ''' self.n = n self.binwalk = binwalk self.length = length self.start = offset self.data = '' self.dlen = 0 self.i = 0 self.total_read = 0 self.entropy = {} self.valid_strings = [] self.external_validators = [] self.plugins = plugins self.block = block if not self.n: self.n = self.MIN_STRING_LENGTH if self.block is not None: # Perform an entropy analysis over the entire file (anything less may generate poor entropy data). # Give fake file results list to prevent FileEntropy from doing too much analysis. with entropy.FileEntropy(file_name, block=self.block, file_results=['foo']) as e: (self.x, self.y, self.average_entropy) = e.analyze(algorithm=algorithm) for i in range(0, len(self.x)): self.entropy[self.x[i]] = self.y[i] # Make sure our block size matches the entropy analysis's block size self.block = e.block # Make sure the starting offset is a multiple of the block size; else, when later checking # the entropy analysis, block offsets won't line up. self.start -= (self.start % self.block) else: i = 0 self.block = common.BlockFile.READ_BLOCK_SIZE # Fake the entropy scan while i < common.file_size(file_name): self.entropy[i] = 1.0 i += self.block self.fd = common.BlockFile(file_name, 'r', length=length, offset=self.start) # TODO: This is not optimal. We should read in larger chunks and process it into self.block chunks. self.fd.READ_BLOCK_SIZE = self.block self.fd.MAX_TRAILING_SIZE = 0 self.start = self.fd.offset # Set the total_scanned and scan_length values for plugins and status display messages if self.binwalk: self.binwalk.total_scanned = 0 self.binwalk.scan_length = self.fd.length