Пример #1
0
	def _jump(self, data):
		'''
		Obtains the jump-to-offset value of a signature, if any.

		@data - String result data.

		Returns the offset to jump to.
		'''
		offset = 0

		offset_str = self._get_keyword_arg(data, 'jump')
		if offset_str:
			try:
				offset = str2int(offset_str)
			except:
				pass

		return offset
Пример #2
0
	def _get_math_arg(self, data, keyword):
		'''
		Retrieves the argument for keywords that specifiy mathematical expressions as arguments.

		@data    - String result data, as returned by libmagic.
		@keyword - Keyword index in KEYWORDS.

		Returns the resulting calculated value.
		'''
		value = 0

		arg = self._get_keyword_arg(data, keyword)
		if arg:
			for string_int in arg.split('+'):
				try:
					value += str2int(string_int)
				except:
					self.invalid = True

		return value			
Пример #3
0
	def parse(self, data):
		'''
		Parse a given data string for smart signature keywords. If any are found, interpret them and strip them.

		@data - String to parse, as returned by libmagic.

		Returns a dictionary of parsed values.
		'''
		results = {
			'offset'	: '',		# Offset where the match was found, filled in by Binwalk.single_scan.
			'description'	: '',		# The libmagic data string, stripped of all keywords
			'name'		: '',		# The original name of the file, if known
			'delay'		: '',		# Extract delay description
			'extract'	: '',		# Name of the extracted file, filled in by Binwalk.single_scan.
			'jump'		: 0,		# The relative offset to resume the scan from
			'size'		: 0,		# The size of the file, if known
			'adjust'	: 0,		# The relative offset to add to the reported offset
			'year'		: 0,		# The file's creation/modification year, if reported in the signature
			'epoch'		: 0,		# The file's creation/modification epoch time, if reported in the signature
			'invalid'	: False,	# Set to True if parsed numerical values appear invalid
		}

		self.invalid = False

		# If smart signatures are disabled, or the result data is not valid (i.e., potentially malicious), 
		# don't parse anything, just return the raw data as the description.
		if self.ignore_smart_signatures or not self._is_valid(data):
			results['description'] = data
		else:
			# Calculate and replace math keyword values
			data = self._replace_maths(data)

			# Parse the offset-adjust value. This is used to adjust the reported offset at which 
			# a signature was located due to the fact that MagicParser.match expects all signatures
			# to be located at offset 0, which some wil not be.
			results['adjust'] = self._get_math_arg(data, 'adjust')

			# Parse the file-size value. This is used to determine how many bytes should be extracted
			# when extraction is enabled. If not specified, everything to the end of the file will be
			# extracted (see Binwalk.scan).
			try:
				results['size'] = str2int(self._get_math_arg(data, 'filesize'))
			except:
				pass

			try:
				results['year'] = str2int(self._get_keyword_arg(data, 'year'))
			except:
				pass
			
			try:
				results['epoch'] = str2int(self._get_keyword_arg(data, 'epoch'))
			except:
				pass

			results['delay'] = self._get_keyword_arg(data, 'delay')

			# Parse the string for the jump-to-offset keyword.
			# This keyword is honored, even if this string result is one of many.
			results['jump'] = self._get_math_arg(data, 'jump')

			# If this is one of many, don't do anything and leave description as a blank string.
			# Else, strip all keyword tags from the string and process additional keywords as necessary.
			if not self._one_of_many(data):
				results['name'] = self._get_keyword_arg(data, 'filename').strip('"')
				results['description'] = self._strip_tags(data)

		results['invalid'] = self.invalid

		return results
Пример #4
0
	def _parse_raw_strings(self, data):
		'''
		Process strings that aren't NULL byte terminated, but for which we know the string length.
		This should be called prior to any other smart parsing functions.

		@data - String to parse.

		Returns a parsed string.
		'''
		if not self.ignore_smart_signatures and self._is_valid(data):
			# Get the raw string  keyword arg
			raw_string = self._get_keyword_arg(data, 'raw-string')

			# Was a raw string  keyword specified?
			if raw_string:
				# Get the raw string length arg
				raw_size = self._get_keyword_arg(data, 'raw-size')
	
				# Is the raw string  length arg is a numeric value?
				if re.match('^-?[0-9]+$', raw_size):
					# Replace all instances of raw-replace in data with raw_string[:raw_size]
					# Also strip out everything after the raw-string keyword, including the keyword itself.
					# Failure to do so may (will) result in non-printable characters and this string will be 
					# marked as invalid when it shouldn't be.
					data = data[:data.find(self.KEYWORDS['raw-string'])].replace(self.KEYWORDS['raw-replace'], '"' + raw_string[:str2int(raw_size)] + '"')
		return data
Пример #5
0
	def _parse_line(self, line):
		'''
		Parses a signature line into its four parts (offset, type, condition and description),
		looking for the first line of a given signature.

		@line - The signature line to parse.

		Returns a dictionary with the respective line parts populated if the line is the first of a signature.
		Returns a dictionary with all parts set to None if the line is not the first of a signature.
		'''
		entry = {
			'offset'	: '',
			'type'		: '',
			'condition'	: '',
			'description'	: '',
			'length'	: 0
		}

		# Quick and dirty pre-filter. We are only concerned with the first line of a
		# signature, which will always start with a number. Make sure the first byte of
		# the line is a number; if not, don't process.
		if line[:1] < '0' or line[:1] > '9':
			return None

		try:
			# Split the line into white-space separated parts.
			# For this to work properly, replace escaped spaces ('\ ') with '\x20'.
			# This means the same thing, but doesn't confuse split().
			line_parts = line.replace('\\ ', '\\x20').split()
			entry['offset'] = line_parts[0]
			entry['type'] = line_parts[1]
			# The condition line may contain escaped sequences, so be sure to decode it properly.
			entry['condition'] = string_decode(line_parts[2])
			entry['description'] = ' '.join(line_parts[3:])
		except Exception as e:
			raise Exception("%s :: %s", (str(e), line))

		# We've already verified that the first character in this line is a number, so this *shouldn't*
		# throw an exception, but let's catch it just in case...
		try:
			entry['offset'] = str2int(entry['offset'])
		except Exception as e:
			raise Exception("%s :: %s", (str(e), line))

		# If this is a string, get the length of the string
		if 'string' in entry['type'] or entry['condition'] == self.WILDCARD:
			entry['length'] = len(entry['condition'])
		# Else, we need to jump through a few more hoops...
		else:	
			# Default to little endian, unless the type field starts with 'be'. 
			# This assumes that we're running on a little endian system...
			if entry['type'].startswith('be'):
				endianess = self.BIG_ENDIAN
			else:
				endianess = self.LITTLE_ENDIAN
			
			# Try to convert the condition to an integer. This does not allow
			# for more advanced conditions for the first line of a signature, 
			# but needing that is rare.
			try:
				intval = str2int(entry['condition'].strip('L'))
			except Exception as e:
				raise Exception("Failed to evaluate condition for '%s' type: '%s', condition: '%s', error: %s" % (entry['description'], entry['type'], entry['condition'], str(e)))

			# How long is the field type?
			if entry['type'] == 'byte':
				entry['length'] = 1
			elif 'short' in entry['type']:
				entry['length'] = 2
			elif 'long' in entry['type']:
				entry['length'] = 4
			elif 'quad' in entry['type']:
				entry['length'] = 8

			# Convert the integer value to a string of the appropriate endianess
			entry['condition'] = self._to_string(intval, entry['length'], endianess)

		return entry