def read_text_file(self, path): """Try multiple different text encodings to read a text file Args: path: (string) file path Returns: (string, string) the content of the file and its encoding None if it failed to read the file """ text_encodings = [ "utf-8", "latin-1", "iso-8859-1", "utf-16", "utf-32", "cp500" ] content = None for encoding in text_encodings: try: with open(path, 'r', encoding=encoding) as content_file: content = content_file.read() break except ValueError as expn: continue except (OSError, IOError) as expn: Output.print_error("Critical error while reading file " + path + "\n" + str(expn)) return content except Exception as expn: Output.print_error("Exception while opening file " + path + "\n" + str(expn)) return content return content
def download_file(url, download_directory): """Download a remote file Args: download_directory: (string) Returns: (string) that path of the file that was just downloaded. If something failed during download, return None Raises: DownloadError """ Output.print_information("Downloading " + url + " ...") parsed_url = urlparse(url) if parsed_url.path in ["/", ""]: file_name = parsed_url.netloc else: file_name = parsed_url.path.split("/")[-1] download_path = abspath(join(download_directory, file_name)) try: with open(download_path, 'wb') as file_object: file_object.write(urlopen(url).read()) return download_path except HTTPError as expn: raise DownloadError("HTTP error code " + str(expn.code) + " while retrieving " \ + url + "\n" + str(expn.reason)) except URLError as expn: raise DownloadError("HTTP URL error while retrieving " + url + "\n" + str(expn.reason)) except Exception as expn: raise DownloadError("Unable to retrieve " + url + "\n" + str(expn))
def read_binary_file(self, path): """Read a binary file Args: path: (string) file path Returns: (bytes) raw bytes sequence in the binary file None if it failed to read the file """ content = None try: with open(path, 'rb') as content_file: content = content_file.read() except (OSError, IOError) as expn: Output.print_error("Critical error while reading file " + path + "\n" + str(expn)) return content except Exception as expn: Output.print_error("Couldn't open binary file " + path + "\n" + str(expn)) return content return content
def extract_by_library(library, archive_path, display_path, output_directory): """Extracts the given archive file to the output directory using the given library Args: library: (module) archive_path: (string) physical path of file on the hardware display_path: (string) file path that should be displayed to the user output_directory: (string) Returns: None Raises: ExtractError """ library_name = library.__name__ Output.print_information("Extracting " + library_name + " archive " \ + display_path + " ...") try: with library.open(archive_path, "rb") as archive_file: decompressed_data = archive_file.read() # remove the extension from filename filename = basename(archive_path) if len(filename.split(".")) > 1: filename = ".".join(filename.split(".")[:-1]) with open(abspath(join(output_directory, filename)), "wb") as decomp_file: decomp_file.write(decompressed_data) except Exception as expn: raise ExtractError("Failed to extract " + library_name + " archive " \ + display_path + "\n" + str(expn))
def cleanup_tmp_folder(self): """Clean up temporary folder Args: None Returns: None """ lose = set() # do not modify tmp_directories during the loop for tmp_dir in self.tmp_directories: if exists(tmp_dir): try: shutil.rmtree(tmp_dir) except: # directories that cannot be searched cause problems try: FileLister.set_tree_perms(tmp_dir) shutil.rmtree(tmp_dir) except Exception as e: Output.print_warning( "Temp directory %s was not removed (%s)" % (tmp_dir, str(e))) continue lose.add(tmp_dir) FileLister.all_temp_dirs -= lose self.tmp_directories -= lose Output.print_information( "Temp dir count is %s %s" % (len(self.tmp_directories), len(FileLister.all_temp_dirs)))
def skip_package(self, package_name, package_root): """Check to see if we should skip listing this package if the crypto file already exists Args: package_name: (string) package name package_root: (string) the directory where package is located. If package is not a local one, this is None Returns: (bool) if we should skip listing this package """ if not self.skip_existing: return False output_directory = self.output_directory if package_root and self.output_in_package_directory: output_directory = package_root crypto_file_path = join(output_directory, package_name + ".crypto") crypto_exists = isfile(crypto_file_path) if crypto_exists: skip_message = "Found a crypto file for package " \ + package_name + " at " + crypto_file_path + ". Will skip scanning this package." Output.print_information(skip_message) Logger.log(skip_message) return crypto_exists
def read_file(self, path): """Reads a file at the given path to return its content and language Args: path: (string) file path Returns: tuple (file content, language) file content is either a str or bytes array depending on whether or not it is binary. """ content = None filename, file_extension = os.path.splitext(path) file_extension = file_extension.split(".")[-1].lower() language = Language.guess_language(file_extension) if language == Language.Unknown: # if we couldn't guess the type of the file from its extension, try to open it # as plain text, and if that failed, treat it as binary, but if that succeeded, # check the characters in the file to ensure it is a text file. content = self.read_text_file(path) if content is None: content = self.read_binary_file(path) language = Language.Binary else: if self.has_nontext_characters(content): content = self.read_binary_file(path) language = Language.Binary else: language = Language.PlainText else: if language.is_text: content = self.read_text_file(path) if content is None: Output.print_error("Couldn't decode the text file " + \ path + "using any of Unicode, Latin, ISO-8859, or EBCDIC encodings." + \ " Will treat as binary.") content = self.read_binary_file(path) language = Language.Binary else: content = self.read_binary_file(path) language = Language.Binary if content is not None: if language == Language.Binary: self.package_binary_bytes += len(content) else: self.package_text_bytes += len(content) self.package_lines_of_text += len(content.split("\n")) return content, language
def list_file(self, file_path, tmp_root_path="", current_path=""): """List a single file as package Args: file_path: (string) file path tmp_root_path: (string) if file is in a tmp directory, this is the address of that directory, otherwise null. current_path: (string) current address within the temporary directory. If we are not in a tmp directory, this is also null. This is used to compute the display path. Returns: (list) a list containing one file-list for this file. """ archive_type = FileLister.archive_type(file_path) package_name = basename(file_path) package_root = abspath(dirname(file_path)) if tmp_root_path: package_root = None if self.skip_package(package_name, package_root): return [] # if this is itself a cyrpto file, don't list it as a package if file_path.split(".")[-1] == "crypto": Output.print_information("\nThe file " + file_path + " has a .crypto extention. " \ + "This is reserved for the output of this program. Will not list this file " \ + "as a package.") return [] if tmp_root_path: display_path = join(current_path, relpath(file_path, tmp_root_path)) else: display_path = abspath(file_path) if archive_type: tmp_dir = self.create_tmp_directory(package_name) FileLister.extract_archive(archive_type, file_path, display_path, tmp_dir) return self.list_directory(tmp_dir, package_name, tmp_root_path=tmp_dir, \ current_path=display_path, _package_root=package_root) else: display_path = file_path if tmp_root_path: display_path = join(current_path, relpath(file_path, tmp_root_path)) return [{ "package_name": package_name, "package_root": package_root, "file_list": [{"display_path": display_path, "physical_path": file_path}] }]
def get_directory_filelist(self, path, tmp_root_path, current_path): """Recursively list all the files in a directory, extracting all the archives inside. Args: path: (string) path of the directory tmp_root_path: (string) if the directory is inside of a tmp directory, this is the address of that directory, otherwise null. current_path: (string) current address within the temporary directory. If we are not in a tmp directory, this is also null. This is used to compute the display path. Returns: (list) a list of files, where each file is a dict with two keys "display_path" and "physical_path". "display_path" is the path that's shown to the user and "physical_path" is where file can be accessed. """ file_list = [] for dirpath, _, filenames in walk(path, followlinks=False): for filename in filenames: full_path = abspath(join(dirpath, filename)) if islink(full_path): Output.print_warning("Skipping symbolic link: " + full_path) continue archive_type = FileLister.archive_type(full_path) if archive_type: tmp_dir = self.create_tmp_directory(full_path) if tmp_root_path: display_path = join(current_path, relpath(full_path, tmp_root_path)) else: display_path = full_path try: FileLister.extract_archive(archive_type, full_path, display_path, tmp_dir) except ExtractError as expn: Output.print_error(str(expn)) continue file_list.extend(self.get_directory_filelist(tmp_dir, \ tmp_root_path=tmp_dir, current_path=display_path)) else: if tmp_root_path: file_list.append({ "display_path": join(current_path, relpath(full_path, tmp_root_path)), "physical_path": full_path }) else: file_list.append({"display_path": full_path, "physical_path": full_path}) return file_list
def write_crypto_file(self, json_data, output_directory, package_name): """Writes the crypto data to a file at the output_directory Args: json_data: (dict) output_directory: (string) package_name: (string) Returns: None Raises: FileWriteException """ output_file = os.path.join(output_directory, package_name) if self.output_existing == "rename": duplicate_number = 1 duplicate_file = output_file while os.path.exists(duplicate_file + ".crypto"): duplicate_file = output_file + "." + str(duplicate_number) duplicate_number += 1 output_file = duplicate_file + ".crypto" else: output_file += ".crypto" Output.print_information("\nWriting output in " + output_file + " ...\n") output_file += ".partial" try: with open(output_file, 'w') as file_object: if self.pretty: JSON_string = json.dumps(json_data, sort_keys=True, indent=2) else: JSON_string = json.dumps(json_data) file_object.write(JSON_string) except (OSError, IOError) as e: raise FileWriteException("Failed to write result in the crypto file " + output_file \ + "\n" + str(e)) # rename the file back from .crypto.partial to .crypto at the very last step to ensure # writing completely succeeded when a .crypto file exists crypto_file_path = output_file[:-8] if os.path.exists(crypto_file_path): os.remove(crypto_file_path) os.rename(output_file, crypto_file_path)
def extract_tar(tar_file_path, display_path, output_directory): """Extract a tar archive Args: tar_file_path: (string) physical path of file on the hardware display_path: (string) file path that should be displayed to the user output_directory: (string) Returns: None Raises: ExtractError """ Output.print_information("Extracting tar archive " + display_path + " ...") try: with tarfile.open(tar_file_path) as tar_file: tar_file.extractall(output_directory) except Exception as expn: raise ExtractError("Failed to extract tar archive " + display_path + "\n" + str(expn))
def extract_zip(zip_file_path, display_path, output_directory): """Extract a zip file Args: zip_file_path: (string) physical path of file on the hardware display_path: (string) file path that should be displayed to the user output_directory: (string) Returns: None Raises: ExtractError """ Output.print_information("Extracting zip archive " + display_path + " ...") try: with zipfile.ZipFile(zip_file_path) as zip_file: zip_file.extractall(output_directory) except Exception as expn: raise ExtractError("Failed to extract zip archive " + display_path + "\n" + str(expn))
def extract_rpm_archive(archive_path, display_path, output_directory): """Extract RPM archive Args: archive_path: (string) physical path of file on the hardware display_path: (string) file path that should be displayed to the user output_directory: (string) Returns: None Raises: ExtractError """ Output.print_information("Extracting RPM archive " \ + display_path + " ...") try: extract_rpm(archive_path, output_directory) except Exception as expn: raise ExtractError("Failed to extract RPM archive " + display_path \ + "\n\n" + str(expn))
import traceback from cryptodetector import CryptoDetector, Output, Options, Logger, FileLister from cryptodetector.exceptions import CryptoDetectorError if __name__ == '__main__': try: log_output_directory = None options = Options(CryptoDetector.VERSION).read_all_options() if "log" in options: if options["log"]: log_output_directory = options["output"] CryptoDetector(options).scan() print("done") except CryptoDetectorError as expn: Output.print_error(str(expn)) if log_output_directory: Logger.write_log_files(log_output_directory) FileLister.cleanup_all_tmp_files() except KeyboardInterrupt: FileLister.cleanup_all_tmp_files() raise except Exception as expn: Output.print_error("Unhandled exception.\n\n" + str(traceback.format_exc())) if log_output_directory: Logger.write_log_files(log_output_directory) FileLister.cleanup_all_tmp_files()
def scan(self): """Main function to initiate the scanning job Args: None Returns: None """ if not self.packages: return Logger.log("Crypto Detector " + CryptoDetector.VERSION + " (c) Wind River Systems") Logger.log(("{} "*6).format(platform.system(), platform.node(), platform.release(), \ platform.version(), platform.machine(), platform.processor())) Logger.log("Python " + str(platform.python_version())) Logger.log("") Logger.log("") Logger.log( "Starting a new crypto scanning job with the following options:") Logger.log("source_files_only: " + str(self.source_files_only)) Logger.log("stop_after: " + str(self.stop_after)) Logger.log("output_existing: " + str(self.output_existing)) Logger.log("ignore_evidence_types: " + str(Method.ignore_evidence_types)) if self.method_keyword_active: Logger.log("keyword method is active keyword list version " \ + str(self.method_keyword_kwlist_version) + " and ignore_case set to " \ + str(self.method_keyword_ignore_case)) if self.method_api_active: Logger.log("API method is active with api list version " \ + str(self.method_api_kwlist_version)) package_count = 0 total_execution_time = 0 total_text_bytes = 0 total_binary_bytes = 0 total_file_count = 0 total_lines_of_text = 0 for package_path in self.packages: package_filelist = self.file_lister.get_package_filelist( package_path) for package in package_filelist: package_name = package["package_name"] package_root = package["package_root"] file_list = package["file_list"] sha1_list = [] package_count += 1 match_count = 0 checksums = {} crypto_output = CryptoOutput() self.current_package = package_name self.package_text_bytes = 0 self.package_binary_bytes = 0 self.package_lines_of_text = 0 crypto_output.set_package_name(package_name) Output.print_information("Scanning package " + package_name + "\n") start_time = time.time() stats = {} self.package_text_bytes = 0 self.package_binary_bytes = 0 self.package_lines_of_text = 0 if self.quick and package_name not in self.quick_scan_result: self.quick_scan_result[package_name] = False for file_path in file_list: content, language = self.read_file( file_path["physical_path"]) if content is None: raise FailedFileRead("Failed to open the file '" + file_path["display_path"] \ + "' to read its contents. Please run the scan with --log and open the log" \ + " file for details of this error.") if isinstance(content, str): encoded_content = codecs.encode(content, "utf-8") else: encoded_content = content hexdigest = hashlib.sha1(encoded_content).hexdigest() sha1_list.append(hexdigest) found_matches = False for method_id in self.active_methods: method = self.active_methods[method_id] if not method.supports_scanning_file(language): continue if self.source_files_only and not language.is_source_code: continue Output.print_information("[" + method.method_id \ + "] Scanning file " + file_path["display_path"]) if self.quick: if method.quick_search(content, language): found_matches = True break else: result = method.search(content, language) if not result: continue else: found_matches = True if file_path["display_path"] not in checksums: checksums[ file_path["display_path"]] = hexdigest for match in result: match["detection_method"] = method_id match = self.validate_match_fields( method_id, match) crypto_output.add_hit( file_path=file_path["display_path"], file_sha1=checksums[ file_path["display_path"]], file_language=language, hit=match) match_count += 1 if self.quick: if found_matches: self.quick_scan_result[package_name] = True break else: self.full_scan_result[ package_name] = crypto_output.get_crypto_data() if self.stop_after and found_matches: if self.stop_after == 1: break else: self.stop_after -= 1 crypto_output.set_verif_code(sha1_list) stats["execution_time"] = time.time() - start_time stats["file_count"] = len(file_list) stats["package_text_bytes"] = self.package_text_bytes stats["package_binary_bytes"] = self.package_binary_bytes stats["package_lines_of_text"] = self.package_lines_of_text if package_root != None and self.output_in_package_directory: output_directory = package_root else: output_directory = self.output_directory # write the output to a file if not self.skip_output and not self.quick: self.write_crypto_file(crypto_output.get_crypto_data(), output_directory, package_name) number_of_matches = "Did not find any matches" if match_count == 1: number_of_matches = "Found only one match" elif match_count > 1: number_of_matches = "Found " + str( match_count) + " matches" Logger.log("") Logger.log("Finished scanning package " + package_name + " in " \ + str(round(stats["execution_time"], 2)) + " seconds.") Logger.log("There were " + str(stats["file_count"]) + " files consisting of " \ + str(stats["package_lines_of_text"]) + " lines of text in " \ + CryptoDetector.human_readable_filesize(stats["package_text_bytes"]) \ + " of text data and " \ + CryptoDetector.human_readable_filesize(stats["package_binary_bytes"]) \ + " of binary data.") Logger.log(number_of_matches + " in " + package_name) total_execution_time += stats["execution_time"] total_file_count += stats["file_count"] total_text_bytes += stats["package_text_bytes"] total_binary_bytes += stats["package_binary_bytes"] total_lines_of_text += stats["package_lines_of_text"] Output.print_information("\nCleaning up temporary files ...") self.file_lister.cleanup_tmp_folder() # write quick scan output to stdout and some output file if self.quick and not self.skip_output: output_message = "" if not any([ self.quick_scan_result[package] for package in self.quick_scan_result ]): output_message += "\n\nDid not find any matches in any of the packages.\n\n" else: output_message += "\n\nFound matches in the following packages:\n\n" for package in self.quick_scan_result: if self.quick_scan_result[package]: output_message += " " + package + "\n" output_message += "\n\n" print(output_message) # write quick search output to a file quick_output_filename = os.path.join(self.output_directory, \ "quick-scan-result.txt") with open(quick_output_filename, "w") as output_file: output_file.write(output_message) # print stats Output.print_information("\nTook " \ + str(round(total_execution_time, 2)) + " seconds to scan " \ + str(total_file_count) + " files in " \ + str(package_count) + " package" \ + ("s" if package_count > 1 else "") + ", processing " \ + str(total_lines_of_text) \ + " lines of text in " \ + CryptoDetector.human_readable_filesize(total_text_bytes) \ + " of text data, in addition to scanning " \ + CryptoDetector.human_readable_filesize(total_binary_bytes) \ + " of binary data.") if self.log: Logger.write_log_files(self.output_directory) if self.quick: return self.quick_scan_result else: return self.full_scan_result
def read_config_file(self, path): """Read configuration file and update self.options Args: path: (string) path of the config file Returns: None Raises: InvalidConfigException """ config = configparser.ConfigParser(allow_no_value=True, delimiters=('=')) config.optionxform = str if path: path_conf = os.path.abspath(path) Output.print_information("Reading configuration file " + path_conf, True) if not os.path.isfile(path_conf): raise InvalidConfigException( "The specified config file doesn't exist.") config.read(path_conf) else: home_directory = os.path.expanduser("~") cwd_conf = os.path.abspath( os.path.join(os.getcwd(), "cryptodetector.conf")) home_conf = os.path.abspath( os.path.join(home_directory, "cryptodetector.conf")) # Does config file exist in current working directory? if os.path.isfile(cwd_conf): Output.print_information( "Reading configuration file " + cwd_conf, True) self.options["config_file"] = cwd_conf config.read(cwd_conf) # Does config file exist in home folder ? elif os.path.isfile(home_conf): Output.print_information( "Reading configuration file " + home_conf, True) self.options["config_file"] = home_conf config.read(home_conf) else: Output.print_information("Didn't find any configuration file. Expect all " \ + "parameters from the command line.", True) return for section in ["settings", "methods"]: if section not in config.sections(): raise InvalidConfigException("Invalid configuration file. [" \ + section + "] section " + "is required.") for option in self.options: if isinstance(self.options[option], list): option_value = Options.read_array_option(config, option) elif isinstance(self.options[option], bool): option_value = Options.read_boolean_option( config, "settings", option) else: option_value = Options.read_string_option( config, "settings", option) if option_value != None: self.options[option] = option_value for option in ["methods", "packages"]: if config.has_section(option): self.options[option] = [ item for item, _ in config.items(option) ] if self.options["output_existing"] not in [ "rename", "overwrite", "skip" ]: raise InvalidConfigException("Invalid config file. In section [settings] " \ + "output_existing had invalid value '" + self.options["output_existing"] \ + "'. Its value must be one of three choices: " \ + "'rename', 'overwrite', and 'skip'.") if not self.options["methods"]: raise InvalidConfigException("Invalid configuration file. There should be one " \ + "or more items under the [methods] section.") methods = self.options["methods"] Options.validate_methods(methods) for method in methods: method_class = Options.available_methods()[method] if not hasattr(method_class, "options"): continue for option in method_class.options: if isinstance(method_class.options[option], list): option_value = Options.read_array_option(config, "method:" \ + method + ":" + option) elif isinstance(method_class.options[option], bool): option_value = Options.read_boolean_option( config, "method:" + method, option) else: option_value = Options.read_string_option( config, "method:" + method, option) if option_value != None: method_class.options[option] = option_value self.options[method + "_" + option] = option_value
def __init__(self, options, skip_output=False): """ Args: options: (dict) skip_output: (bool) whether we should skip writing or printing out the output (used only for unit testing) Returns: None Raises: InvalidOptionsException """ try: for option in ["output", "quick", "output_in_package_directory", "output_existing", \ "pretty", "log", "source_files_only"]: setattr(self, option, options[option]) self.output_directory = self.output Method.ignore_evidence_types = options["ignore_evidence_types"] Output.verbose = options["verbose"] Output.suppress_warnings = options["suppress_warnings"] stop_after = options["stop_after"] packages = options["packages"] methods = options["methods"] except KeyError as expn: raise InvalidOptionsException("Missing required option: \n" + str(expn)) self.packages = packages self.quick_scan_result = {} self.full_scan_result = {} self.skip_output = skip_output self.current_package = None self.stop_after = None if stop_after: try: self.stop_after = int(stop_after) except: raise InvalidOptionsException("Invalid stop_after value: '" + stop_after + "'.") if self.stop_after < 1: raise InvalidOptionsException( "stop_after should be a positive integer.") if not os.path.isdir(self.output_directory): raise InvalidOptionsException("The specified output directory doesn't exist: " \ + self.output_directory) if not packages: Output.print_information("\nNo package specified. Nothing to scan.\n", \ ignore_verbose=True) return if self.output_existing not in ["rename", "overwrite", "skip"]: raise InvalidOptionsException("output_existing had invalid value '" \ + self.output_existing + "'. Its value must be one of three choices: " \ + "'rename', 'overwrite', and 'skip'.") self.file_lister = FileLister(packages, (self.output_existing == "skip"), \ self.output_directory, self.output_in_package_directory) method_classes = { mc.method_id: mc for mc in MethodFactory.method_classes } method_instances = {} for method in method_classes: # instantiate method method_instances[method] = method_classes[method]() # read options method_options = None if hasattr(method_instances[method], "options"): method_options = method_instances[method].options if method == "keyword": self.method_keyword_active = (method in methods) self.method_keyword_ignore_case = method_options["ignore_case"] self.method_keyword_kwlist_version = method_options[ "keyword_list_version"] elif method == "api": self.method_api_active = (method in methods) self.method_api_kwlist_version = method_options[ "keyword_list_version"] # active methods are the ones we use to scan the code self.active_methods = {} for method in methods: if method not in method_classes: raise InvalidOptionsException("Invalid method " + method) self.active_methods[method] = method_instances[method]