Пример #1
0
def test_file_size():
    setup_test_files()
    assert file_size(f'{test_folder_path}/readwrite.txt') == 11
    # Need to either add logic to common to deal with file not existing
    # OR need to read from strerr and assert this check evaluates FileNotFoundError
    # assert file_size('C:/udp-app-master/dev/tests/working/this_not_does_exist.txt') == 0
    teardown_test_files()
Пример #2
0
def prepare_binary():
    env = common.prepare_env()

    # make dir to put the binary in
    if not os.path.isdir(os.path.join("..", "artifact")):
        os.mkdir(os.path.join("..", "artifact"))

    BUILD_FILENAME = ""

    if isinstance(BUILD_FILENAME, str):
        BUILD_FILENAME = list(BUILD_FILENAME)

    BUILD_FILENAMES = BUILD_FILENAME

    for BUILD_FILENAME in BUILD_FILENAMES:
        DEST_FILENAME = common.prepare_filename(BUILD_FILENAME)

        print(f"OS Name:        {env['OS_NAME']}")
        print(f"OS Version:     {env['OS_VERSION']}")
        print(f"Build Filename: {BUILD_FILENAME}")
        print(f"Dest Filename:  {DEST_FILENAME}")
        if not BUILD_FILENAME == "":
            print("Build Filesize: " + common.file_size(BUILD_FILENAME))
        else:
            exit(1)

        if not BUILD_FILENAME == "":
            move(os.path.join(".", BUILD_FILENAME),
                 os.path.join("..", "artifact", BUILD_FILENAME))
Пример #3
0
    def extract(self, offset, description, file_name, size, name=None):
        '''
		Extract an embedded file from the target file, if it matches an extract rule.
		Called automatically by Binwalk.scan().

		@offset      - Offset inside the target file to begin the extraction.
		@description - Description of the embedded file to extract, as returned by libmagic.
		@file_name   - Path to the target file.
		@size        - Number of bytes to extract.
		@name        - Name to save the file as.

		Returns the name of the extracted file (blank string if nothing was extracted).
		'''
        cleanup_extracted_fname = True

        rule = self._match(description)
        if rule is not None:
            fname = self._dd(file_name,
                             offset,
                             size,
                             rule['extension'],
                             output_file_name=name)
            if rule['cmd']:

                # Many extraction utilities will extract the file to a new file, just without
                # the file extension (i.e., myfile.7z => myfile). If the presumed resulting
                # file name already exists before executing the extract command, do not attempt
                # to clean it up even if its resulting file size is 0.
                if self.remove_after_execute:
                    extracted_fname = os.path.splitext(fname)[0]
                    if os.path.exists(extracted_fname):
                        cleanup_extracted_fname = False

                # Execute the specified command against the extracted file
                self._execute(rule['cmd'], fname)

                # Only clean up files if remove_after_execute was specified
                if self.remove_after_execute:

                    # Remove the original file that we extracted
                    try:
                        os.unlink(fname)
                    except:
                        pass

                    # If the command worked, assume it removed the file extension from the extracted file

                    # If the extracted file name file exists and is empty, remove it
                    if cleanup_extracted_fname and os.path.exists(
                            extracted_fname) and file_size(
                                extracted_fname) == 0:
                        try:
                            os.unlink(extracted_fname)
                        except:
                            pass
        else:
            fname = ''

        return fname
Пример #4
0
	def __init__(self, file_name, binwalk, length=0, offset=0, n=MIN_STRING_LENGTH, block=0, algorithm=None, plugins=None):
		'''
		Class constructor. Preferred to be invoked from the Strings class instead of directly.

		@file_name - The file name to perform a strings analysis on.
		@binwalk   - An instance of the Binwalk class.
		@length    - The number of bytes in the file to analyze.
		@offset    - The starting offset into the file to begin analysis.
		@n         - The minimum valid string length.
		@block     - The block size to use when performing entropy analysis.
		@algorithm - The entropy algorithm to use when performing entropy analysis.
		@plugins   - An instance of the Plugins class.

		Returns None.
		'''
		self.n = n
		self.binwalk = binwalk
		self.length = length
		self.start = offset
		self.file_name = file_name
		self.fd = open(self.file_name, 'rb')
		self.data = ''
		self.dlen = 0
		self.i = 0
		self.total_read = 0
		self.entropy = {}
		self.valid_strings = []
		self.external_validators = []
		self.plugins = plugins

		if not self.n:
			self.n = self.MIN_STRING_LENGTH

		# Perform an entropy analysis over the entire file (anything less may generate poor entropy data).
		# Give fake file results list to prevent FileEntropy from doing too much analysis.
		with entropy.FileEntropy(self.file_name, block=block, file_results=['foo']) as e:
			(self.x, self.y, self.average_entropy) = e.analyze()
			for i in range(0, len(self.x)):
				self.entropy[self.x[i]] = self.y[i]
			# Make sure our block size matches the entropy analysis's block size
			self.block = e.block

		# Make sure the starting offset is a multiple of the block size; else, when later checking
		# the entropy analysis, block offsets won't line up.
		self.start -= (self.start % self.block)

		try:
			self.fd.seek(self.start)
		except:
			self.fd.read(self.start)

		# Set the total_scanned and scan_length values for plugins and status display messages
		self.binwalk.total_scanned = 0
		if self.length:
			self.binwalk.scan_length = self.length
		else:
			self.binwalk.scan_length = common.file_size(self.fd.name) - self.start
Пример #5
0
	def __init__(self, file_name=None, fd=None, binwalk=None, offset=0, length=None, block=DEFAULT_BLOCK_SIZE, plugins=None, file_results=[], compcheck=False):
		'''
		Class constructor.

		@file_name    - The path to the file to analyze.
		@fd           - A file object to analyze data from.
		@binwalk      - An instance of the Binwalk class.
		@offset       - The offset into the data to begin analysis.
		@length       - The number of bytes to analyze.
		@block        - The size of the data blocks to analyze.
		@plugins      - Instance of the Plugins class.
		@file_results - Scan results to overlay on the entropy plot graph.
		@compcheck    - Set to True to enable entropy compression detection.

		Returns None.
		'''
		self.fd = fd
		self.start = offset
		self.length = length
		self.block = block
		self.binwalk = binwalk
		self.plugins = plugins
		self.total_read = 0
		self.fd_open = False
		self.file_results = file_results
		self.do_chisq = compcheck

		if file_name is None and self.fd is None:
			raise Exception("Entropy.__init__ requires at least the file_name or fd options")

		if self.fd is None:
			self.fd = open(file_name, 'rb')
			self.fd_open = True

		if not self.length:
			self.length = None

		if not self.start:
			self.start = 0

		if not self.block:
			self.block = self.DEFAULT_BLOCK_SIZE
			
		# Some file descriptors aren't seekable (stdin, for example)
		try:
			self.fd.seek(self.start)
		except:
			self.fd.read(self.start)

		if self.binwalk:
			# Set the total_scanned and scan_length values for plugins and status display messages
			self.binwalk.total_scanned = 0
			if self.length:
				self.binwalk.scan_length = self.length
			else:
				self.binwalk.scan_length = common.file_size(self.fd.name) - self.start
Пример #6
0
	def extract(self, offset, description, file_name, size, name=None):
		'''
		Extract an embedded file from the target file, if it matches an extract rule.
		Called automatically by Binwalk.scan().

		@offset      - Offset inside the target file to begin the extraction.
		@description - Description of the embedded file to extract, as returned by libmagic.
		@file_name   - Path to the target file.
		@size        - Number of bytes to extract.
		@name        - Name to save the file as.

		Returns the name of the extracted file (blank string if nothing was extracted).
		'''
		cleanup_extracted_fname = True

		rule = self._match(description)
		if rule is not None:
			fname = self._dd(file_name, offset, size, rule['extension'], output_file_name=name)
			if rule['cmd']:

				# Many extraction utilities will extract the file to a new file, just without
				# the file extension (i.e., myfile.7z => myfile). If the presumed resulting
				# file name already exists before executing the extract command, do not attempt 
				# to clean it up even if its resulting file size is 0.
				if self.remove_after_execute:
					extracted_fname = os.path.splitext(fname)[0]
					if os.path.exists(extracted_fname):
						cleanup_extracted_fname = False

				# Execute the specified command against the extracted file
				self._execute(rule['cmd'], fname)

				# Only clean up files if remove_after_execute was specified				
				if self.remove_after_execute:

					# Remove the original file that we extracted
					try:
						os.unlink(fname)
					except:
						pass

					# If the command worked, assume it removed the file extension from the extracted file

					# If the extracted file name file exists and is empty, remove it
					if cleanup_extracted_fname and os.path.exists(extracted_fname) and file_size(extracted_fname) == 0:
						try:
							os.unlink(extracted_fname)
						except:
							pass
		else:
			fname = ''

		return fname
Пример #7
0
    def upload_to_blobstore(self):
        """Upload publish_folder's <dataset_name>-<job_id>.zip to landing blobstore."""

        # don't upload captured data if we're in --notransfer mode
        if self.option('notransfer'):
            logger.warning(
                'Not uploading data to landing per --notransfer option')
            return

        # upload capture file to landing blobstore
        self.events.start('upload', 'step')
        resource = self.config(self.project.blobstore_landing)
        bs_landing = BlobStore()
        bs_landing.connect(resource)
        bs_landing.put(self.zip_file_name, just_file_name(self.zip_file_name))
        bs_landing.disconnect()

        # finish
        self.events.stop('upload', 0, file_size(self.zip_file_name))
Пример #8
0
    def compress_work_folder(self):
        """Compress all files in work_folder to single file in publish_folder."""

        # setup
        self.events.start('compress', 'step')
        self.capture_file_name = f'{self.dataset_name}#{self.job_id:09}'
        self.zip_file_name = f'{self.publish_folder}/{self.capture_file_name}'

        # copy capture_state files to work folder to be included in capture zip package as well
        copy_file_if_exists(f'{self.state_folder}/last_job.log',
                            self.work_folder)

        # compress (make_archive() appends a .zip file extension to zip_file_name)
        self.zip_file_name = shutil.make_archive(self.zip_file_name,
                                                 format='zip',
                                                 root_dir=self.work_folder)

        # finish
        self.events.stop('compress', 0, file_size(self.zip_file_name))
Пример #9
0
    def extract(self, offset, description, file_name, size, name=None):
        '''
		Extract an embedded file from the target file, if it matches an extract rule.
		Called automatically by Binwalk.scan().

		@offset      - Offset inside the target file to begin the extraction.
		@description - Description of the embedded file to extract, as returned by libmagic.
		@file_name   - Path to the target file.
		@size        - Number of bytes to extract.
		@name        - Name to save the file as.

		Returns the name of the extracted file (blank string if nothing was extracted).
		'''
        fname = ''
        cleanup_extracted_fname = True
        original_dir = os.getcwd()

        if not os.path.exists(self.extract_path):
            os.mkdir(self.extract_path)

        file_path = os.path.realpath(file_name)

        if os.path.isfile(file_path):
            os.chdir(self.extract_path)

            # Loop through each extraction rule until one succeeds
            for rule in self._match(description):
                # Copy out the data to disk
                fname = self._dd(file_path,
                                 offset,
                                 size,
                                 rule['extension'],
                                 output_file_name=name)

                # If there was a command specified for this rule, try to execute it.
                # If execution fails, the next rule will be attempted.
                if rule['cmd']:

                    # Many extraction utilities will extract the file to a new file, just without
                    # the file extension (i.e., myfile.7z -> myfile). If the presumed resulting
                    # file name already exists before executing the extract command, do not attempt
                    # to clean it up even if its resulting file size is 0.
                    if self.remove_after_execute:
                        extracted_fname = os.path.splitext(fname)[0]
                        if os.path.exists(extracted_fname):
                            cleanup_extracted_fname = False

                    # Execute the specified command against the extracted file
                    extract_ok = self._execute(rule['cmd'], fname)

                    # Only clean up files if remove_after_execute was specified
                    if self.remove_after_execute:

                        # Remove the original file that we extracted
                        try:
                            os.unlink(fname)
                        except:
                            pass

                        # If the command worked, assume it removed the file extension from the extracted file

                        # If the extracted file name file exists and is empty, remove it
                        if cleanup_extracted_fname and os.path.exists(
                                extracted_fname) and file_size(
                                    extracted_fname) == 0:
                            try:
                                os.unlink(extracted_fname)
                            except:
                                pass

                    # If the command executed OK, don't try any more rules
                    if extract_ok:
                        break
                # If there was no command to execute, just use the first rule
                else:
                    break

            os.chdir(original_dir)

        # If a file was extracted, return the full path to that file
        if fname:
            fname = os.path.join(self.extract_path, fname)

        return fname
Пример #10
0
	def single_scan(self, target_file='', fd=None, offset=0, length=0, show_invalid_results=False, callback=None, plugins_whitelist=[], plugins_blacklist=[]):
		'''
		Performs a binwalk scan on one target file or file descriptor.

		@target_file 	      - File to scan.
		@fd                   - File descriptor to scan.
		@offset      	      - Starting offset at which to start the scan.
		@length      	      - Number of bytes to scan. Specify -1 for streams.
		@show_invalid_results - Set to True to display invalid results.
		@callback    	      - Callback function to be invoked when matches are found.
		@plugins_whitelist    - A list of plugin names to load. If not empty, only these plugins will be loaded.
		@plugins_blacklist    - A list of plugin names to not load.

		The callback function is passed two arguments: a list of result dictionaries containing the scan results
		(one result per dict), and the offset at which those results were identified. Example callback function:

			def my_callback(offset, results):
				print "Found %d results at offset %d:" % (len(results), offset)
				for result in results:
					print "\t%s" % result['description']

			binwalk.Binwalk(callback=my_callback).scan("firmware.bin")

		Upon completion, the scan method returns a sorted list of tuples containing a list of results dictionaries
		and the offsets at which those results were identified:

			scan_results = [
					(0, [{description : "LZMA compressed data..."}]),
					(112, [{description : "gzip compressed data..."}])
			]

		See SmartSignature.parse for a more detailed description of the results dictionary structure.
		'''
		scan_results = {}
		fsize = 0
		jump_offset = 0
		i_opened_fd = False
		i_loaded_plugins = False
		plugret = PLUGIN_CONTINUE
		plugret_start = PLUGIN_CONTINUE
		self.total_read = 0
		self.total_scanned = 0
		self.scan_length = length
		self.filter.show_invalid_results = show_invalid_results
		self.start_offset = offset

		# Check to make sure either a target file or a file descriptor was supplied
		if not target_file and fd is None:
			raise Exception("Must supply Binwalk.single_scan with a valid file path or file object")

		# Load the default signatures if self.load_signatures has not already been invoked
		if self.magic is None:
			self.load_signatures()

		# Need the total size of the target file, even if we aren't scanning the whole thing
		if target_file:
			fsize = file_size(target_file)
			
		# Open the target file and seek to the specified start offset
		if fd is None:
			fd = open(target_file)
			i_opened_fd = True
	
		# Seek to the starting offset. This is invalid for some file-like objects such as stdin,
		# so if an exception is thrown try reading offset bytes from the file object.	
		try:	
			fd.seek(offset)
		except:
			fd.read(offset)
		
		# If no length was specified, make the length the size of the target file minus the starting offset
		if self.scan_length == 0:
			self.scan_length = fsize - offset

		# If the Plugins class has not already been instantitated, do that now.
		if self.plugins is None:
			self.plugins = Plugins(self, blacklist=plugins_blacklist, whitelist=plugins_whitelist)
			i_loaded_plugins = True
		
			if self.load_plugins:
				self.plugins._load_plugins()

		# Invoke any pre-scan plugins
		plugret_start = self.plugins._pre_scan_callbacks(fd)
		
		# Main loop, scan through all the data
		while not ((plugret | plugret_start) & PLUGIN_TERMINATE):
			i = 0

			# Read in the next block of data from the target file and make sure it's valid
			(data, dlen) = self._read_block(fd)
			if data is None or dlen == 0:
				break

			# The total number of bytes scanned could be bigger than the total number
			# of bytes read from the file if the previous signature result specified a 
			# jump offset that was beyond the end of the then current data block.
			#
			# If this is the case, we need to index into this data block appropriately in order to 
			# resume the scan from the appropriate offset, and adjust dlen accordingly.
			if jump_offset > 0:
				total_check = self.total_scanned + dlen

				if jump_offset >= total_check:
					i = -1
					
					# Try to seek to the jump offset; this won't work if fd == sys.stdin
					try:
						fd.seek(jump_offset)
						self.total_read = jump_offset
						self.total_scanned = jump_offset - dlen
						jump_offset = 0
					except:
						pass
				elif jump_offset < total_check:
					# Index into this block appropriately
					i = jump_offset - self.total_scanned
					jump_offset = 0

			# Scan through each block of data looking for signatures
			if i >= 0 and i < dlen:

				# Scan this data block for a list of offsets which are candidates for possible valid signatures
				for candidate in self.parser.find_signature_candidates(data[i:dlen]):

					# If a signature specified a jump offset beyond this candidate signature offset, ignore it
					if (i + candidate + self.total_scanned) < jump_offset:
						continue

					# Reset these values on each loop	
					smart = {}
					results = []
					results_offset = -1

					# Pass the data to libmagic, and split out multiple results into a list
					for magic_result in self.parser.split(self.magic.buffer(data[i+candidate:i+candidate+self.MAX_SIGNATURE_SIZE])):

						i_set_results_offset = False

						# Some file names are not NULL byte terminated, but rather their length is
						# specified in a size field. To ensure these are not marked as invalid due to
						# non-printable characters existing in the file name, parse the filename(s) and
						# trim them to the specified filename length, if one was specified.
						magic_result = self.smart._parse_raw_strings(magic_result)

						# Make sure this is a valid result before further processing
						if not self.filter.invalid(magic_result):
							# The smart filter parser returns a dictionary of keyword values and the signature description.
							smart = self.smart.parse(magic_result)
	
							# Validate the jump value and check if the response description should be displayed
							if smart['jump'] > -1 and self._should_display(smart):
								# If multiple results are returned and one of them has smart['jump'] set to a non-zero value,
								# the calculated results offset will be wrong since i will have been incremented. Only set the
								# results_offset value when the first match is encountered.
								if results_offset < 0:
									results_offset = offset + i + candidate + smart['adjust'] + self.total_scanned
									i_set_results_offset = True

								# Double check to make sure the smart['adjust'] value is sane. 
								# If it makes results_offset negative, then it is not sane.
								if results_offset >= 0:
									smart['offset'] = results_offset

									# Invoke any scan plugins 
									if not (plugret_start & PLUGIN_STOP_PLUGINS):
										plugret = self.plugins._scan_callbacks(smart)
										results_offset = smart['offset']
										if (plugret & PLUGIN_TERMINATE):
											break

									# Extract the result, if it matches one of the extract rules and is not a delayed extract.
									if self.extractor.enabled and not (self.extractor.delayed and smart['delay']) and not ((plugret | plugret_start) & PLUGIN_NO_EXTRACT):
										# If the signature did not specify a size, extract to the end of the file.
										if not smart['size']:
											smart['size'] = fsize-results_offset
										
										smart['extract'] = self.extractor.extract(	results_offset, 
																smart['description'], 
																target_file, 
																smart['size'], 
																name=smart['name'])

									if not ((plugret | plugret_start) & PLUGIN_NO_DISPLAY):
										# This appears to be a valid result, so append it to the results list.
										results.append(smart)
									elif i_set_results_offset:
										results_offset = -1

					# Did we find any valid results?
					if results_offset >= 0:
						scan_results[results_offset] = results
					
						if callback is not None:
							callback(results_offset, results)
			
						# If a relative jump offset was specified, update the absolute jump_offset variable
						if smart.has_key('jump') and smart['jump'] > 0:
							jump_offset = results_offset + smart['jump']

			# Track the total number of bytes scanned
			self.total_scanned += dlen
			# The starting offset only affects the reported offset for results
			# in the first block of data. Zero it out after the first block has
			# been processed.
			offset = 0

		# Sort the results before returning them
		scan_items = scan_results.items()
		scan_items.sort()

		# Do delayed extraction, if specified.
		if self.extractor.enabled and self.extractor.delayed:
			scan_items = self.extractor.delayed_extract(scan_items, target_file, fsize)

		# Invoke any post-scan plugins
		#if not (plugret_start & PLUGIN_STOP_PLUGINS):
		self.plugins._post_scan_callbacks(fd)

		# Be sure to delete the Plugins instance so that there isn't a lingering reference to
		# this Binwalk class instance (lingering handles to this Binwalk instance cause the
		# __del__ deconstructor to not be called).
		if i_loaded_plugins:
			del self.plugins
			self.plugins = None

		if i_opened_fd:
			fd.close()

		return scan_items
def prepare_release():
    env = common.prepare_env()  # get env vars

    dirs = [
        os.path.join("..", "artifact"),  # temp dir for binary
        os.path.join("..", "build"),  # temp dir for other stuff
        os.path.join("..", "deploy")  # dir for archive
    ]
    for dirname in dirs:
        if not os.path.isdir(dirname):
            os.makedirs(dirname)

    # make dirs for each os
    # for dirname in ["linux", "macos", "windows"]:
    for dirname in ["linux"]:
        if not os.path.isdir(os.path.join("..", "deploy", dirname)):
            os.mkdir(os.path.join("..", "deploy", dirname))

    # sanity check permissions for working_dirs.json
    dirpath = "."
    for dirname in ["resources", "user", "meta", "manifests"]:
        dirpath += os.path.join(dirpath, dirname)
        if os.path.isdir(dirpath):
            os.chmod(dirpath, 0o755)

    # nuke git files
    for git in [
            os.path.join(".", ".gitattrubutes"),
            os.path.join(".", ".gitignore")
    ]:
        if os.path.isfile(git):
            os.remove(git)

    # nuke travis file if it exists
    for travis in [
            os.path.join(".", ".travis.yml"),
            os.path.join(".", ".travis.off")
    ]:
        if os.path.isfile(travis):
            os.remove(travis)

    # nuke test suite if it exists
    if os.path.isdir(os.path.join(".", "tests")):
        distutils.dir_util.remove_tree(os.path.join(".", "tests"))

    BUILD_FILENAME = ""
    ZIP_FILENAME = ""

    # list executables
    BUILD_FILENAME = (os.path.join("."))
    if BUILD_FILENAME == "":
        BUILD_FILENAME = (os.path.join("..", "artifact"))

    if isinstance(BUILD_FILENAME, str):
        BUILD_FILENAME = list(BUILD_FILENAME)

    BUILD_FILENAMES = BUILD_FILENAME

    print(BUILD_FILENAMES)

    if len(BUILD_FILENAMES) > 0:
        # clean the git slate
        git_clean()

        # mv dirs from source code
        dirs = [
            os.path.join(".", ".git"),
            os.path.join(".", ".github"),
            os.path.join(".", ".gitattributes"),
            os.path.join(".", ".gitignore"),
            os.path.join(".", "html"),
            os.path.join(".", "resources"),
            os.path.join(".", "schemas"),
            os.path.join(".", "CODE_OF_CONDUCT.md")
        ]
        for dirname in dirs:
            if os.path.exists(dirname):
                move(dirname, os.path.join("..", "build", dirname))

        # .zip if windows
        # .tar.gz otherwise
        ZIP_FILENAME = os.path.join("..", "deploy", env["REPO_NAME"])
        make_archive(ZIP_FILENAME, "zip")
        ZIP_FILENAME += ".zip"

        # mv dirs back
        for thisDir in dirs:
            if os.path.exists(os.path.join("..", "build", thisDir)):
                move(os.path.join("..", "build", thisDir),
                     os.path.join(".", thisDir))

    if not ZIP_FILENAME == "":
        print(f"Zip Filename:   {ZIP_FILENAME}")
        print("Zip Filesize:   " + common.file_size(ZIP_FILENAME))
    else:
        print(f"No Zip to prepare: {ZIP_FILENAME}")

    print(f"Git tag:        {env['GITHUB_TAG']}")

    if (ZIP_FILENAME == ""):
        exit(1)
Пример #12
0
def prepare_release():
    env = common.prepare_env()  # get env vars

    dirs = [
        os.path.join("..", "artifact"),  # temp dir for binary
        os.path.join("..", "build"),  # temp dir for other stuff
        os.path.join("..", "deploy")  # dir for archive
    ]
    for dirname in dirs:
        if not os.path.isdir(dirname):
            os.makedirs(dirname)

    # make dirs for each os
    for dirname in ["linux", "macos", "windows"]:
        if not os.path.isdir(os.path.join("..", "deploy", dirname)):
            os.mkdir(os.path.join("..", "deploy", dirname))

    # sanity check permissions for working_dirs.json
    dirpath = "."
    for dirname in ["resources", "user", "meta", "manifests"]:
        dirpath += os.path.join(dirpath, dirname)
        if os.path.isdir(dirpath):
            os.chmod(dirpath, 0o755)

    # nuke git files
    for git in [
            os.path.join(".", ".gitattrubutes"),
            os.path.join(".", ".gitignore")
    ]:
        if os.path.isfile(git):
            os.remove(git)

    # nuke travis file if it exists
    for travis in [
            os.path.join(".", ".travis.yml"),
            os.path.join(".", ".travis.off")
    ]:
        if os.path.isfile(travis):
            os.remove(travis)

    # nuke test suite if it exists
    if os.path.isdir(os.path.join(".", "tests")):
        distutils.dir_util.remove_tree(os.path.join(".", "tests"))

    BUILD_FILENAME = ""
    ZIP_FILENAME = ""

    # list executables
    BUILD_FILENAME = common.find_binary(os.path.join("."))
    if BUILD_FILENAME == "":
        BUILD_FILENAME = common.find_binary(os.path.join("..", "artifact"))

    if isinstance(BUILD_FILENAME, str):
        BUILD_FILENAME = list(BUILD_FILENAME)

    BUILD_FILENAMES = BUILD_FILENAME

    print(BUILD_FILENAMES)

    if len(BUILD_FILENAMES) > 0:
        for BUILD_FILENAME in BUILD_FILENAMES:
            if not BUILD_FILENAME == "":
                if "artifact" not in BUILD_FILENAME:
                    # move the binary to temp folder
                    move(os.path.join(".", BUILD_FILENAME),
                         os.path.join("..", "artifact", BUILD_FILENAME))

        # clean the git slate
        git_clean()

        # mv dirs from source code
        dirs = [
            os.path.join(".", ".git"),
            os.path.join(".", ".github"),
            os.path.join(".", ".gitattributes"),
            os.path.join(".", ".gitignore"),
            os.path.join(".", "html"),
            os.path.join(".", "resources", "ci")
        ]
        for dirname in dirs:
            if os.path.isdir(dirname):
                move(dirname, os.path.join("..", "build", dirname))

        for BUILD_FILENAME in BUILD_FILENAMES:
            if "artifact" not in BUILD_FILENAME:
                if os.path.isfile(
                        os.path.join("..", "artifact", BUILD_FILENAME)):
                    # move the binary back
                    move(os.path.join("..", "artifact", BUILD_FILENAME),
                         os.path.join(".", BUILD_FILENAME))
                    # Make Linux/Mac binary executable
                    if "linux" in env["OS_NAME"] or \
                        "ubuntu" in env["OS_NAME"] or \
                        "mac" in env["OS_NAME"] or \
                            "osx" in env["OS_NAME"]:
                        os.chmod(os.path.join(".", BUILD_FILENAME), 0o755)

        # .zip if windows
        # .tar.gz otherwise
        if len(BUILD_FILENAMES) > 1:
            ZIP_FILENAME = os.path.join("..", "deploy", env["REPO_NAME"])
        else:
            ZIP_FILENAME = os.path.join("..", "deploy",
                                        os.path.splitext(BUILD_FILENAME)[0])
        if env["OS_NAME"] == "windows":
            make_archive(ZIP_FILENAME, "zip")
            ZIP_FILENAME += ".zip"
        else:
            make_archive(ZIP_FILENAME, "gztar")
            ZIP_FILENAME += ".tar.gz"

        # mv dirs back
        for thisDir in dirs:
            if os.path.isdir(os.path.join("..", "build", thisDir)):
                move(os.path.join("..", "build", thisDir),
                     os.path.join(".", thisDir))

    for BUILD_FILENAME in BUILD_FILENAMES:
        if not BUILD_FILENAME == "":
            print(f"Build Filename: {BUILD_FILENAME}")
            print("Build Filesize: " + common.file_size(BUILD_FILENAME))
        else:
            print(f"No Build to prepare: {BUILD_FILENAME}")

    if not ZIP_FILENAME == "":
        print(f"Zip Filename:   {ZIP_FILENAME}")
        print("Zip Filesize:   " + common.file_size(ZIP_FILENAME))
    else:
        print(f"No Zip to prepare: {ZIP_FILENAME}")

    print(f"Git tag:        {env['GITHUB_TAG']}")

    if (len(BUILD_FILENAMES) == 0) or (ZIP_FILENAME == ""):
        exit(1)
Пример #13
0
	def extract(self, offset, description, file_name, size, name=None):
		'''
		Extract an embedded file from the target file, if it matches an extract rule.
		Called automatically by Binwalk.scan().

		@offset      - Offset inside the target file to begin the extraction.
		@description - Description of the embedded file to extract, as returned by libmagic.
		@file_name   - Path to the target file.
		@size        - Number of bytes to extract.
		@name        - Name to save the file as.

		Returns the name of the extracted file (blank string if nothing was extracted).
		'''
		fname = ''
		cleanup_extracted_fname = True
		original_dir = os.getcwd()

		if not os.path.exists(self.extract_path):
			os.mkdir(self.extract_path)

		file_path = os.path.realpath(file_name)
		
		if os.path.isfile(file_path):
			os.chdir(self.extract_path)

			# Loop through each extraction rule until one succeeds
			for rule in self._match(description):
				# Copy out the data to disk
				fname = self._dd(file_path, offset, size, rule['extension'], output_file_name=name)

				# If there was a command specified for this rule, try to execute it.
				# If execution fails, the next rule will be attempted.
				if rule['cmd']:

					# Many extraction utilities will extract the file to a new file, just without
					# the file extension (i.e., myfile.7z -> myfile). If the presumed resulting
					# file name already exists before executing the extract command, do not attempt 
					# to clean it up even if its resulting file size is 0.
					if self.remove_after_execute:
						extracted_fname = os.path.splitext(fname)[0]
						if os.path.exists(extracted_fname):
							cleanup_extracted_fname = False
	
					# Execute the specified command against the extracted file
					extract_ok = self._execute(rule['cmd'], fname)

					# Only clean up files if remove_after_execute was specified				
					if self.remove_after_execute:

						# Remove the original file that we extracted
						try:
							os.unlink(fname)
						except:
							pass

						# If the command worked, assume it removed the file extension from the extracted file

						# If the extracted file name file exists and is empty, remove it
						if cleanup_extracted_fname and os.path.exists(extracted_fname) and file_size(extracted_fname) == 0:
							try:
								os.unlink(extracted_fname)
							except:
								pass
					
					# If the command executed OK, don't try any more rules
					if extract_ok:
						break
				# If there was no command to execute, just use the first rule
				else:
					break

			os.chdir(original_dir)

		# If a file was extracted, return the full path to that file	
		if fname:
			fname = os.path.join(self.extract_path, fname)

		return fname
Пример #14
0
	def scan(self, target_file, offset=0, length=0, align=DEFAULT_BYTE_ALIGNMENT, show_invalid_results=False, callback=None):
		'''
		Performs a Binwalk scan on the target file.

		@target_file 			- File to scan.
		@offset      			- Starting offset at which to start the scan.
		@length      			- Number of bytes to scan.
		@align       			- Look for signatures every align bytes.
		@show_invalid_results		- Set to True to display invalid results.
		@callback    			- Callback function to be invoked when matches are found.

		The callback function is passed two arguments: a list of result dictionaries containing the scan results
		(one result per dict), and the offset at which those results were identified. Example callback function:

			def my_callback(offset, results):
				print "Found %d results at offset %d:" % (len(results), offset)
				for result in results:
					print "\t%s" % result['description']

			binwalk.Binwalk(callback=my_callback).scan("firmware.bin")

		Upon completion, the scan method returns a sorted list of tuples containing a list of results dictionaries
		and the offsets at which those results were identified:

			scan_items = [
					(0, [{description : "LZMA compressed data..."}]),
					(112, [{description : "gzip compressed data..."}])
			]

		See SmartSignature.parse for a more detailed description of the results dictionary structure.
		'''
		scan_results = {}
		self.total_read = 0
		self.total_scanned = 0
		self.scan_length = length
		self.filter.show_invalid_results = show_invalid_results

		# Load the default signatures if self.load_signatures has not already been invoked
		if self.magic is None:
			self.load_signatures()

		# Get a local copy of the signature sets generated by self.parser.build_signature_set.
		# This is accessed heavily throughout the scan, and there is less overhead for accessing local variables in Python.
		signature_set = self.parser.build_signature_set()

		# Need the total size of the target file, even if we aren't scanning the whole thing
		fsize = file_size(target_file)

		# Open the target file and seek to the specified start offset
		fd = open(target_file)
		fd.seek(offset)
		
		# If no length was specified, make the length the size of the target file minus the starting offset
		if self.scan_length == 0:
			self.scan_length = fsize - offset
		# Sanity check on the byte alignment; default to 1
		if align <= 0:
			align = 1

		# Main loop, scan through all the data
		while True:
			i = 0

			# Read in the next block of data from the target file and make sure it's valid
			(data, dlen) = self._read_block(fd)
			if data is None or dlen == 0:
				break

			# The total number of bytes scanned could be bigger than the total number
			# of bytes read from the file under the following circumstances:
			#
			#	o The previous dlen was not a multiple of align
			#	o A previous result specified a jump offset that was beyond the end of the
			#	  then current data block
			#
			# If this is the case, we need to index into this data block appropriately in order to 
			# resume the scan from the appropriate offset, and adjust dlen accordingly.
			bufindex = self.total_scanned - self.total_read
			if bufindex > 0:
				# If the total_scanned > total_read, then the total_scanned offset is in a subsequent block.
				# Set i to bufindex, which will cause i to be greater than dlen and this block will be skipped.
				i = bufindex
			elif bufindex < 0:
				# If the total_scanned offset is less than total_read, then the total_scanned offset is
				# somewhere inside this block. Set i to index into the block appropriately.
				i = dlen + bufindex
			else:
				# If the total_scanned offset ends at the end of this block, don't scan any of this block
				i = dlen

			# Scan through each block of data looking for signatures
			while i < dlen:
				smart = {}
				results = []
				results_offset = -1
				pre_filter_ok = False
				smart_jump_done = False

				# Pre-filter data by checking to see if the parser thinks this might be a valid match.
				# This eliminates unnecessary calls into libmagic, which are very expensive.
				#
				# Ideally, this should be done in the MagicParser class, but function calls are expensive.
				# Doing it here greatly decreases the scan time.
				if self.smart.pre_filter:
					for (sig_offset, sigset) in signature_set:
                        			if data[i+sig_offset:i+sig_offset+self.parser.MATCH_INDEX_SIZE] in sigset:
                                			pre_filter_ok = True
							break
				else:
					pre_filter_ok = True

				if pre_filter_ok:
					# Pass the data to libmagic, and split out multiple results into a list
					for magic_result in self.parser.split(self.magic.buffer(data[i:i+self.MAX_SIGNATURE_SIZE])):

						# Some file names are not NULL byte terminated, but rather their length is
						# specified in a size field. To ensure these are not marked as invalid due to
						# non-printable characters existing in the file name, parse the filename(s) and
						# trim them to the specified filename length, if one was specified.
						magic_result = self.smart._parse_raw_strings(magic_result)

						# Make sure this is a valid result before further processing
						if not self.filter.invalid(magic_result):
							# The smart filter parser returns a dictionary of keyword values and the signature description.
							smart = self.smart.parse(magic_result)
	
							# Validate the jump value and check if the response description should be displayed
							if smart['jump'] > -1 and self._should_display(smart['description']):
								# If multiple results are returned and one of them has smart['jump'] set to a non-zero value,
								# the calculated results offset will be wrong since i will have been incremented. Only set the
								# results_offset value when the first match is encountered.
								if results_offset < 0:
									results_offset = offset + smart['adjust'] + self.total_scanned

								# Double check to make sure the smart['adjust'] value is sane. 
								# If it makes results_offset negative, then it is not sane.
								if results_offset >= 0:
									# Extract the result, if it matches one of the extract rules and is not a delayed extract.
									if self.extractor.enabled and not (self.extractor.delayed and smart['delay']):
										# If the signature did not specify a size, extract to the end of the file.
										if smart['size'] == 0:
											smart['size'] = fsize-results_offset

										smart['extract'] = self.extractor.extract(	results_offset, 
																smart['description'], 
																target_file, 
																smart['size'], 
																name=smart['name'])

									# This appears to be a valid result, so append it to the results list.
									results.append(smart)

							# Jump to the offset specified by jump. Only do this once, so that if multiple results
							# are returned each of which specify a jump offset, only the first will be honored.
							if smart['jump'] > 0 and not smart_jump_done:
								# Once a jump offset has been honored, we need to start scanning every byte since the
								# jump offset may have thrown off the original alignment. In terms of speed this is fine,
								# since the jump offset usually saves more time anyway. If this is not what the user
								# wanted/intended, disabling pre filtering will disable jump offset processing completely.
								align = self.DEFAULT_BYTE_ALIGNMENT
								smart_jump_done = True
								i += (smart['jump'] - align)
								self.total_scanned += (smart['jump'] - align)

				# Did we find any valid results?
				if results_offset >= 0:
					scan_results[results_offset] = results
					
					if callback is not None:
						callback(results_offset, results)

				# Track the number of bytes scanned in this block, and the total number of bytes scanned.	
				i += align
				self.total_scanned += align

		# Sort the results before returning them
		scan_items = scan_results.items()
		scan_items.sort()

		# Do delayed extraction, if specified.
		if self.extractor.enabled and self.extractor.delayed:
			scan_items = self.extractor.delayed_extract(scan_items, target_file, fsize)

		return scan_items
Пример #15
0
    def single_scan(self,
                    target_file='',
                    fd=None,
                    offset=0,
                    length=0,
                    show_invalid_results=False,
                    callback=None,
                    plugins_whitelist=[],
                    plugins_blacklist=[]):
        '''
		Performs a binwalk scan on one target file or file descriptor.

		@target_file 	      - File to scan.
		@fd                   - File descriptor to scan.
		@offset      	      - Starting offset at which to start the scan.
		@length      	      - Number of bytes to scan. Specify -1 for streams.
		@show_invalid_results - Set to True to display invalid results.
		@callback    	      - Callback function to be invoked when matches are found.
		@plugins_whitelist    - A list of plugin names to load. If not empty, only these plugins will be loaded.
		@plugins_blacklist    - A list of plugin names to not load.

		The callback function is passed two arguments: a list of result dictionaries containing the scan results
		(one result per dict), and the offset at which those results were identified. Example callback function:

			def my_callback(offset, results):
				print "Found %d results at offset %d:" % (len(results), offset)
				for result in results:
					print "\t%s" % result['description']

			binwalk.Binwalk(callback=my_callback).scan("firmware.bin")

		Upon completion, the scan method returns a sorted list of tuples containing a list of results dictionaries
		and the offsets at which those results were identified:

			scan_results = [
					(0, [{description : "LZMA compressed data..."}]),
					(112, [{description : "gzip compressed data..."}])
			]

		See SmartSignature.parse for a more detailed description of the results dictionary structure.
		'''
        scan_results = {}
        fsize = 0
        jump_offset = 0
        i_opened_fd = False
        i_loaded_plugins = False
        plugret = PLUGIN_CONTINUE
        plugret_start = PLUGIN_CONTINUE
        self.total_read = 0
        self.total_scanned = 0
        self.scan_length = length
        self.filter.show_invalid_results = show_invalid_results

        # Check to make sure either a target file or a file descriptor was supplied
        if not target_file and fd is None:
            raise Exception(
                "Must supply Binwalk.single_scan with a valid file path or file object"
            )

        # Load the default signatures if self.load_signatures has not already been invoked
        if self.magic is None:
            self.load_signatures()

        # Need the total size of the target file, even if we aren't scanning the whole thing
        if target_file:
            fsize = file_size(target_file)

        # Open the target file and seek to the specified start offset
        if fd is None:
            fd = open(target_file)
            i_opened_fd = True

        # Seek to the starting offset. This is invalid for some file-like objects such as stdin,
        # so if an exception is thrown try reading offset bytes from the file object.
        try:
            fd.seek(offset)
        except:
            fd.read(offset)

        # If no length was specified, make the length the size of the target file minus the starting offset
        if self.scan_length == 0:
            self.scan_length = fsize - offset

        # If the Plugins class has not already been instantitated, do that now.
        if self.plugins is None:
            self.plugins = Plugins(self,
                                   blacklist=plugins_blacklist,
                                   whitelist=plugins_whitelist)
            i_loaded_plugins = True

            if self.load_plugins:
                self.plugins._load_plugins()

        # Invoke any pre-scan plugins
        plugret_start = self.plugins._pre_scan_callbacks(fd)

        # Main loop, scan through all the data
        while not ((plugret | plugret_start) & PLUGIN_TERMINATE):
            i = 0

            # Read in the next block of data from the target file and make sure it's valid
            (data, dlen) = self._read_block(fd)
            if data is None or dlen == 0:
                break

            # The total number of bytes scanned could be bigger than the total number
            # of bytes read from the file if the previous signature result specified a
            # jump offset that was beyond the end of the then current data block.
            #
            # If this is the case, we need to index into this data block appropriately in order to
            # resume the scan from the appropriate offset, and adjust dlen accordingly.
            if jump_offset > 0:
                total_check = self.total_scanned + dlen

                if jump_offset >= total_check:
                    i = -1

                    # Try to seek to the jump offset; this won't work if fd == sys.stdin
                    try:
                        fd.seek(jump_offset)
                        self.total_read = jump_offset
                        self.total_scanned = jump_offset - dlen
                        jump_offset = 0
                    except:
                        pass
                elif jump_offset < total_check:
                    # Index into this block appropriately
                    i = jump_offset - self.total_scanned
                    jump_offset = 0

            # Scan through each block of data looking for signatures
            if i >= 0 and i < dlen:

                # Scan this data block for a list of offsets which are candidates for possible valid signatures
                for candidate in self.parser.find_signature_candidates(
                        data[i:dlen]):

                    # If a signature specified a jump offset beyond this candidate signature offset, ignore it
                    if (i + candidate + self.total_scanned) < jump_offset:
                        continue

                    # Reset these values on each loop
                    smart = {}
                    results = []
                    results_offset = -1

                    # Pass the data to libmagic, and split out multiple results into a list
                    for magic_result in self.parser.split(
                            self.magic.buffer(
                                data[i + candidate:i + candidate +
                                     self.MAX_SIGNATURE_SIZE])):

                        i_set_results_offset = False

                        # Some file names are not NULL byte terminated, but rather their length is
                        # specified in a size field. To ensure these are not marked as invalid due to
                        # non-printable characters existing in the file name, parse the filename(s) and
                        # trim them to the specified filename length, if one was specified.
                        magic_result = self.smart._parse_raw_strings(
                            magic_result)

                        # Make sure this is a valid result before further processing
                        if not self.filter.invalid(magic_result):
                            # The smart filter parser returns a dictionary of keyword values and the signature description.
                            smart = self.smart.parse(magic_result)

                            # Validate the jump value and check if the response description should be displayed
                            if smart['jump'] > -1 and self._should_display(
                                    smart['description']):
                                # If multiple results are returned and one of them has smart['jump'] set to a non-zero value,
                                # the calculated results offset will be wrong since i will have been incremented. Only set the
                                # results_offset value when the first match is encountered.
                                if results_offset < 0:
                                    results_offset = offset + i + candidate + smart[
                                        'adjust'] + self.total_scanned
                                    i_set_results_offset = True

                                # Double check to make sure the smart['adjust'] value is sane.
                                # If it makes results_offset negative, then it is not sane.
                                if results_offset >= 0:
                                    smart['offset'] = results_offset

                                    # Invoke any scan plugins
                                    if not (plugret_start
                                            & PLUGIN_STOP_PLUGINS):
                                        plugret = self.plugins._scan_callbacks(
                                            smart)
                                        results_offset = smart['offset']
                                        if (plugret & PLUGIN_TERMINATE):
                                            break

                                    # Extract the result, if it matches one of the extract rules and is not a delayed extract.
                                    if self.extractor.enabled and not (
                                            self.extractor.delayed
                                            and smart['delay']) and not (
                                                (plugret | plugret_start)
                                                & PLUGIN_NO_EXTRACT):
                                        # If the signature did not specify a size, extract to the end of the file.
                                        if not smart['size']:
                                            smart[
                                                'size'] = fsize - results_offset

                                        smart[
                                            'extract'] = self.extractor.extract(
                                                results_offset,
                                                smart['description'],
                                                target_file,
                                                smart['size'],
                                                name=smart['name'])

                                    if not ((plugret | plugret_start)
                                            & PLUGIN_NO_DISPLAY):
                                        # This appears to be a valid result, so append it to the results list.
                                        results.append(smart)
                                    elif i_set_results_offset:
                                        results_offset = -1

                    # Did we find any valid results?
                    if results_offset >= 0:
                        scan_results[results_offset] = results

                        if callback is not None:
                            callback(results_offset, results)

                        # If a relative jump offset was specified, update the absolute jump_offset variable
                        if smart.has_key('jump') and smart['jump'] > 0:
                            jump_offset = results_offset + smart['jump']

            # Track the total number of bytes scanned
            self.total_scanned += dlen

        # Sort the results before returning them
        scan_items = scan_results.items()
        scan_items.sort()

        # Do delayed extraction, if specified.
        if self.extractor.enabled and self.extractor.delayed:
            scan_items = self.extractor.delayed_extract(
                scan_items, target_file, fsize)

        # Invoke any post-scan plugins
        #if not (plugret_start & PLUGIN_STOP_PLUGINS):
        self.plugins._post_scan_callbacks(fd)

        # Be sure to delete the Plugins instance so that there isn't a lingering reference to
        # this Binwalk class instance (lingering handles to this Binwalk instance cause the
        # __del__ deconstructor to not be called).
        if i_loaded_plugins:
            del self.plugins
            self.plugins = None

        if i_opened_fd:
            fd.close()

        return scan_items
Пример #16
0
    def __init__(self,
                 file_name=None,
                 fd=None,
                 binwalk=None,
                 offset=0,
                 length=None,
                 block=DEFAULT_BLOCK_SIZE,
                 plugins=None,
                 file_results=[],
                 compcheck=False):
        '''
		Class constructor.

		@file_name    - The path to the file to analyze.
		@fd           - A file object to analyze data from.
		@binwalk      - An instance of the Binwalk class.
		@offset       - The offset into the data to begin analysis.
		@length       - The number of bytes to analyze.
		@block        - The size of the data blocks to analyze.
		@plugins      - Instance of the Plugins class.
		@file_results - Scan results to overlay on the entropy plot graph.
		@compcheck    - Set to True to enable entropy compression detection.

		Returns None.
		'''
        self.fd = fd
        self.start = offset
        self.length = length
        self.block = block
        self.binwalk = binwalk
        self.plugins = plugins
        self.total_read = 0
        self.fd_open = False
        self.file_results = file_results
        self.do_chisq = compcheck

        if file_name is None and self.fd is None:
            raise Exception(
                "Entropy.__init__ requires at least the file_name or fd options"
            )

        if self.fd is None:
            self.fd = open(file_name, 'rb')
            self.fd_open = True

        if not self.length:
            self.length = None

        if not self.start:
            self.start = 0

        if not self.block:
            self.block = self.DEFAULT_BLOCK_SIZE

        # Some file descriptors aren't seekable (stdin, for example)
        try:
            self.fd.seek(self.start)
        except:
            self.fd.read(self.start)

        if self.binwalk:
            # Set the total_scanned and scan_length values for plugins and status display messages
            self.binwalk.total_scanned = 0
            if self.length:
                self.binwalk.scan_length = self.length
            else:
                self.binwalk.scan_length = common.file_size(
                    self.fd.name) - self.start
Пример #17
0
    def process_table(self,
                      db,
                      db_engine,
                      schema_name,
                      table_name,
                      table_object,
                      table_history,
                      current_timestamp,
                      current_sequence=0):
        """Process a specific table."""

        # skip default table and ignored tables
        if table_name == 'default':
            return

        # TODO: Allow ignore and drop table conditions to be passed to archive (log table state) and stage (to drop table and table references)
        elif table_object.ignore_table:
            logger.info(f'Skipping table: {table_name} (ignore_table=1)')
            return
        elif table_object.drop_table:
            logger.info(f'Skipping table: {table_name} (drop_table=1)')
            return

        # initialize table history's last time stamp to first timestamp if not set yet
        if not table_history.last_timestamp:
            # default first timestamp to 1900-01-01 if project has no first timestamp
            if not table_object.first_timestamp:
                table_object.first_timestamp = '1900-01-01'
            table_history.last_timestamp = iso_to_datetime(
                table_object.first_timestamp)

        # skip table if last timestamp > current timestamp, eg. tables pre-configured for the future
        if table_history.last_timestamp > current_timestamp:
            explanation = f'first/last timestamp {table_history.last_timestamp} > current timestamp {current_timestamp}'
            logger.info(f'Skipping table: {table_name} ({explanation})')
            return

        # if we're here then we have a legit last timestamp value to use for CDC
        last_timestamp = table_history.last_timestamp

        # initialize table's last_sequence to first_sequence if not set yet
        if not table_history.last_sequence:
            if not table_object.first_sequence:
                table_object.first_sequence = 0
            table_history.last_sequence = table_object.first_sequence

        self.events.start(table_name, 'table')
        # logger.info(f'Processing {table_name} ...')

        # create a fresh cursor for each table
        cursor = db.conn.cursor()

        # save table object for stage
        table_file_name = f'{self.work_folder}/{table_name}.table'
        save_jsonpickle(table_file_name, table_object)

        # discover table schema
        table_schema = db_engine.select_table_schema(schema_name, table_name)

        # handle non-existent tables
        if table_schema is None:
            if table_object.optional_table:
                logger.info(
                    f'Optional table not found; skipped ({table_name})')
            else:
                logger.warning(f'Table not found; skipped ({table_name})')
            return

        # remove ignored columns from table schema
        if table_object.ignore_columns:
            # find columns to ignore (remove) based on ignore column names/glob-style patterns
            ignore_columns = []
            for column_name in table_schema.columns:
                for pattern in split(table_object.ignore_columns):
                    if is_glob_match(column_name, pattern):
                        ignore_columns.append(column_name)

            # delete ignored columns from our table schema
            for column_name in ignore_columns:
                logger.info(f'Ignore_column: {table_name}.{column_name}')
                table_schema.columns.pop(column_name)

        # save table schema for stage to use
        schema_table_name = f'{self.work_folder}/{table_name}.schema'
        save_jsonpickle(schema_table_name, table_schema)

        # save table pk for stage to use
        pk_columns = db_engine.select_table_pk(schema_name, table_name)
        if not pk_columns and table_object.primary_key:
            pk_columns = table_object.primary_key
        save_text(f'{self.work_folder}/{table_name}.pk', pk_columns)

        # normalize cdc setting
        table_object.cdc = table_object.cdc.lower()
        if table_object.cdc == 'none':
            table_object.cdc = ''

        # clear unknown cdc settings
        if table_object.cdc and table_object.cdc not in (
                'filehash', 'rowhash', 'rowversion', 'sequence', 'timestamp'):
            logger.warning(
                f'Warning: Unknown CDC setting; CDC setting cleared ({table_name}.cdc={table_object.cdc})'
            )
            table_object.cdc = ''

        # clear cdc setting when no pk_columns are present
        # NOTE: filehash cdc does not require pk_columns.
        if table_object.cdc and table_object.cdc != 'filehash' and not pk_columns:
            logger.warning(
                f'Warning: CDC enabled but no PK; CDC setting cleared ({table_name}.cdc={table_object.cdc})'
            )
            table_object.cdc = ''

        # if no cdc, then clear cdc related attributes
        if not table_object.cdc:
            table_object.filehash = ''
            table_object.rowhash = ''
            table_object.rowversion = ''
            table_object.sequence = ''
            table_object.timestamp = ''

        # update table object properties for cdc select build
        column_names = list(table_schema.columns.keys())
        table_object.schema_name = schema_name
        table_object.table_name = table_name
        table_object.column_names = column_names
        select_cdc = cdc_select.SelectCDC(db_engine, table_object)
        sql = select_cdc.select(self.job_id, current_timestamp, last_timestamp)

        # save generated SQL to work folder for documentation purposes
        sql_file_name = f'{self.work_folder}/{table_name}.sql'
        save_text(sql_file_name, sql)

        # run sql here vs via db_engine.capture_select
        # cursor = db_engine.capture_select(schema_name, table_name, column_names, last_timestamp, current_timestamp)
        cursor.execute(sql)

        # capture rows in fixed size batches to support unlimited size record counts
        # Note: Batching on capture side allows stage to insert multiple batches in parallel.

        if self.project.batch_size:
            batch_size = int(self.project.batch_size)
            # logger.info(f'Using project specific batch size: {self.project.batch_size}')
        else:
            batch_size = 250_000

        batch_number = 0
        row_count = 0
        data_size = 0
        while True:
            batch_number += 1
            rows = cursor.fetchmany(batch_size)
            if not rows:
                break

            logger.info(
                f'Table({table_name}): batch={batch_number} using batch size {batch_size:,}'
            )
            self.progress_message(
                f'extracting({table_name}.{batch_number:04}) ...')

            # flatten rows to list of column values
            json_rows = [list(row) for row in rows]
            output_file = f'{self.work_folder}/{table_name}#{batch_number:04}.json'
            save_jsonpickle(output_file, json_rows)

            # track metrics
            row_count += len(json_rows)
            data_size += file_size(output_file)

        # update table history with new last timestamp and sequence values
        table_history.last_timestamp = current_timestamp
        table_history.last_sequence = current_sequence

        # track total row count and file size across all of a table's batched json files
        self.events.stop(table_name, row_count, data_size)

        # save interim metrics for diagnostics
        self.events.save()

        self.job_row_count += row_count
        self.job_data_size += data_size

        # explicitly close cursor when finished
        # cursor.close()
        return
Пример #18
0
                                    os.path.splitext(BUILD_FILENAME)[0])
    if env["OS_NAME"] == "windows":
        make_archive(ZIP_FILENAME, "zip")
        ZIP_FILENAME += ".zip"
    else:
        make_archive(ZIP_FILENAME, "gztar")
        ZIP_FILENAME += ".tar.gz"

    # mv dirs back
    for dir in dirs:
        if os.path.isdir(os.path.join("..", "build", dir)):
            move(os.path.join("..", "build", dir), os.path.join(".", dir))

for BUILD_FILENAME in BUILD_FILENAMES:
    if not BUILD_FILENAME == "":
        print("Build Filename: " + BUILD_FILENAME)
        print("Build Filesize: " + common.file_size(BUILD_FILENAME))
    else:
        print("No Build to prepare: " + BUILD_FILENAME)

if not ZIP_FILENAME == "":
    print("Zip Filename:   " + ZIP_FILENAME)
    print("Zip Filesize:   " + common.file_size(ZIP_FILENAME))
else:
    print("No Zip to prepare: " + ZIP_FILENAME)

print("Git tag:        " + env["GITHUB_TAG"])

if (len(BUILD_FILENAMES) == 0) or (ZIP_FILENAME == ""):
    exit(1)
Пример #19
0
    def scan(self,
             target_file,
             offset=0,
             length=0,
             align=DEFAULT_BYTE_ALIGNMENT,
             show_invalid_results=False,
             callback=None):
        '''
		Performs a Binwalk scan on the target file.

		@target_file 			- File to scan.
		@offset      			- Starting offset at which to start the scan.
		@length      			- Number of bytes to scan.
		@align       			- Look for signatures every align bytes.
		@show_invalid_results		- Set to True to display invalid results.
		@callback    			- Callback function to be invoked when matches are found.

		The callback function is passed two arguments: a list of result dictionaries containing the scan results
		(one result per dict), and the offset at which those results were identified. Example callback function:

			def my_callback(offset, results):
				print "Found %d results at offset %d:" % (len(results), offset)
				for result in results:
					print "\t%s" % result['description']

			binwalk.Binwalk(callback=my_callback).scan("firmware.bin")

		Upon completion, the scan method returns a sorted list of tuples containing a list of results dictionaries
		and the offsets at which those results were identified:

			scan_items = [
					(0, [{description : "LZMA compressed data..."}]),
					(112, [{description : "gzip compressed data..."}])
			]

		See SmartSignature.parse for a more detailed description of the results dictionary structure.
		'''
        scan_results = {}
        self.total_read = 0
        self.total_scanned = 0
        self.scan_length = length
        self.filter.show_invalid_results = show_invalid_results

        # Load the default signatures if self.load_signatures has not already been invoked
        if self.magic is None:
            self.load_signatures()

        # Get a local copy of the signature sets generated by self.parser.build_signature_set.
        # This is accessed heavily throughout the scan, and there is less overhead for accessing local variables in Python.
        signature_set = self.parser.build_signature_set()

        # Need the total size of the target file, even if we aren't scanning the whole thing
        fsize = file_size(target_file)

        # Open the target file and seek to the specified start offset
        fd = open(target_file)
        fd.seek(offset)

        # If no length was specified, make the length the size of the target file minus the starting offset
        if self.scan_length == 0:
            self.scan_length = fsize - offset
        # Sanity check on the byte alignment; default to 1
        if align <= 0:
            align = 1

        # Main loop, scan through all the data
        while True:
            i = 0

            # Read in the next block of data from the target file and make sure it's valid
            (data, dlen) = self._read_block(fd)
            if data is None or dlen == 0:
                break

            # The total number of bytes scanned could be bigger than the total number
            # of bytes read from the file under the following circumstances:
            #
            #	o The previous dlen was not a multiple of align
            #	o A previous result specified a jump offset that was beyond the end of the
            #	  then current data block
            #
            # If this is the case, we need to index into this data block appropriately in order to
            # resume the scan from the appropriate offset, and adjust dlen accordingly.
            bufindex = self.total_scanned - self.total_read
            if bufindex > 0:
                # If the total_scanned > total_read, then the total_scanned offset is in a subsequent block.
                # Set i to bufindex, which will cause i to be greater than dlen and this block will be skipped.
                i = bufindex
            elif bufindex < 0:
                # If the total_scanned offset is less than total_read, then the total_scanned offset is
                # somewhere inside this block. Set i to index into the block appropriately.
                i = dlen + bufindex
            else:
                # If the total_scanned offset ends at the end of this block, don't scan any of this block
                i = dlen

            # Scan through each block of data looking for signatures
            while i < dlen:
                smart = {}
                results = []
                results_offset = -1
                pre_filter_ok = False
                smart_jump_done = False

                # Pre-filter data by checking to see if the parser thinks this might be a valid match.
                # This eliminates unnecessary calls into libmagic, which are very expensive.
                #
                # Ideally, this should be done in the MagicParser class, but function calls are expensive.
                # Doing it here greatly decreases the scan time.
                if self.smart.pre_filter:
                    for (sig_offset, sigset) in signature_set:
                        if data[i + sig_offset:i + sig_offset +
                                self.parser.MATCH_INDEX_SIZE] in sigset:
                            pre_filter_ok = True
                            break
                else:
                    pre_filter_ok = True

                if pre_filter_ok:
                    # Pass the data to libmagic, and split out multiple results into a list
                    for magic_result in self.parser.split(
                            self.magic.buffer(data[i:i +
                                                   self.MAX_SIGNATURE_SIZE])):

                        # Some file names are not NULL byte terminated, but rather their length is
                        # specified in a size field. To ensure these are not marked as invalid due to
                        # non-printable characters existing in the file name, parse the filename(s) and
                        # trim them to the specified filename length, if one was specified.
                        magic_result = self.smart._parse_raw_strings(
                            magic_result)

                        # Make sure this is a valid result before further processing
                        if not self.filter.invalid(magic_result):
                            # The smart filter parser returns a dictionary of keyword values and the signature description.
                            smart = self.smart.parse(magic_result)

                            # Validate the jump value and check if the response description should be displayed
                            if smart['jump'] > -1 and self._should_display(
                                    smart['description']):
                                # If multiple results are returned and one of them has smart['jump'] set to a non-zero value,
                                # the calculated results offset will be wrong since i will have been incremented. Only set the
                                # results_offset value when the first match is encountered.
                                if results_offset < 0:
                                    results_offset = offset + smart[
                                        'adjust'] + self.total_scanned

                                # Double check to make sure the smart['adjust'] value is sane.
                                # If it makes results_offset negative, then it is not sane.
                                if results_offset >= 0:
                                    # Extract the result, if it matches one of the extract rules and is not a delayed extract.
                                    if self.extractor.enabled and not (
                                            self.extractor.delayed
                                            and smart['delay']):
                                        # If the signature did not specify a size, extract to the end of the file.
                                        if smart['size'] == 0:
                                            smart[
                                                'size'] = fsize - results_offset

                                        smart[
                                            'extract'] = self.extractor.extract(
                                                results_offset,
                                                smart['description'],
                                                target_file,
                                                smart['size'],
                                                name=smart['name'])

                                    # This appears to be a valid result, so append it to the results list.
                                    results.append(smart)

                            # Jump to the offset specified by jump. Only do this once, so that if multiple results
                            # are returned each of which specify a jump offset, only the first will be honored.
                            if smart['jump'] > 0 and not smart_jump_done:
                                # Once a jump offset has been honored, we need to start scanning every byte since the
                                # jump offset may have thrown off the original alignment. In terms of speed this is fine,
                                # since the jump offset usually saves more time anyway. If this is not what the user
                                # wanted/intended, disabling pre filtering will disable jump offset processing completely.
                                align = self.DEFAULT_BYTE_ALIGNMENT
                                smart_jump_done = True
                                i += (smart['jump'] - align)
                                self.total_scanned += (smart['jump'] - align)

                # Did we find any valid results?
                if results_offset >= 0:
                    scan_results[results_offset] = results

                    if callback is not None:
                        callback(results_offset, results)

                # Track the number of bytes scanned in this block, and the total number of bytes scanned.
                i += align
                self.total_scanned += align

        # Sort the results before returning them
        scan_items = scan_results.items()
        scan_items.sort()

        # Do delayed extraction, if specified.
        if self.extractor.enabled and self.extractor.delayed:
            scan_items = self.extractor.delayed_extract(
                scan_items, target_file, fsize)

        return scan_items