class RadioList(_RadioList): def __init__(self, values: Sequence[Tuple[_T, AnyFormattedText]]) -> None: super().__init__(values) self.handlers = [] self.check_event = Event() def up(self) -> None: self._selected_index = max(0, self._selected_index - 1) def down(self) -> None: self._selected_index = min( len(self.values) - 1, self._selected_index + 1) def get_selected_index(self) -> int: return self._selected_index def get_selected_item(self) -> Tuple[_T, AnyFormattedText]: return self.values[self.get_selected_index()] def get_selected_value(self): return self.get_selected_item()[0] def get_checked_index(self): for idx, value in enumerate(self.values): if value[0] == self.current_value: return idx return -1 def get_checked_value(self): return self.current_value def get_checked_item(self): return self.values[self.get_checked_index()] def set_checked_index(self, index: int): self._selected_index = index self.current_value = self.values[self._selected_index][0] def set_selected_index(self, index: int): self._selected_index = index def _handle_enter(self) -> None: old_value = None for value in self.values: if value[0] == self.current_value: old_value = value new_value = self.values[self._selected_index] super()._handle_enter() self.check_event.fire(old_value, new_value)
class Integrity: """ Class that handles the integrity check process If the current server does not exist in the database, a new record will be added """ def __init__(self, config): """ Integrity constructor :param config: Configuration object :type config: config.Config """ self.config = config self.server = None self.server_is_new = False # If set to true, no events will be fired self.events = [] self.on_events_detected = EventHandler() def load_database(self): """ Loads and initializes the database if necessary :return: None """ if not database_exists(): print("[+] No database found, creating database '{}'".format( DATABASE_PATH)) create_database() if self._server_exists(): self._load_server() else: print( "[+] First run detected for server '{}', setting up tracker.". format(self.config.server_name)) print( "[?] Note: No changes will be able to be detected this session" ) self._add_server() def _server_exists(self): """ Check if this server already is being tracked :return: Returns true if the server already exists """ return Server.exists(name=self.config.server_name) def _add_server(self): """ Add the current server to the table of servers The server_is_new flag will also be set for this session :return: None """ self.server = Server.create(name=self.config.server_name) self.server_is_new = True def _load_server(self): """ Load the server from the database :return: None """ self.server = Server.get(name=self.config.server_name) def identify(self, output): """ Identify :param output: Server output :type output: list :return: """ for index, (path, checksum) in enumerate(output): # Find the checksum record checksum_record = self.server.get_related_checksum(path, checksum) # New (unknown) file detected if checksum_record is None: checksum_record = Checksum.create(path=path, checksum=checksum, server=self.server) self._handle_file_added(checksum_record) continue # Check if the file was modified if checksum_record.checksum != checksum: self._handle_file_modified(checksum_record, checksum) checksum_record.checksum = checksum continue # Loop through all known records first and try to find them for checksum_record in self.server.checksums: if not any([o for o in output if o[0] == checksum_record.path]): self._handle_file_removed(checksum_record) checksum_record.delete() # On events detected if any(self.events): self.on_events_detected.fire( self._get_events_as_anonymous_obj_list()) def _get_events_as_anonymous_obj_list(self): """ Convert the events dict to a list of anonymous objects :return: List of anonymous objects :rtype: dict[object] """ return [e.to_anonymous_object() for e in self.events] def _handle_file_added(self, checksum_record): """ An unknown new file was detected, log the event :param checksum_record: Checksum record which the event will be related to :type checksum_record: models.Checksum :return: None """ if not self.server_is_new: description = "A new file was detected at '{path}'".format( path=checksum_record.path) event = Event.create(event=Event.FILE_ADDED, description=description, checksum=checksum_record) self.events.append(event) def _handle_file_modified(self, checksum_record, checksum): """ A known file was modified, log the event :param checksum_record: Checksum record which the event will be related to :param checksum: The new checksum that was detected :type checksum_record: models.Checksum :type checksum: str :return: None """ description = "File modification was detected at '{path}'".format( path=checksum_record.path) event = Event.create(event=Event.FILE_MODIFIED, description=description, checksum=checksum_record) self.events.append(event) def _handle_file_removed(self, checksum_record): """ A known file was modified, log the event :param checksum_record: Checksum record which the event will be related to :type checksum_record: models.Checksum :return: None """ description = "File removal was detected at '{path}'".format( path=checksum_record.path) event = Event.create(event=Event.FILE_REMOVED, description=description, checksum=checksum_record) self.events.append(event) def print_statistics(self): """ Print the statistics of all events that were found :return: """ print("[+] Integrity statistics") print(" |-- Files added: {}".format( self._get_addition_event_count())) print(" |-- Files removed: {}".format( self._get_removal_event_count())) print(" `-- Files modified: {}".format( self._get_modified_event_count())) def _get_addition_event_count(self): """ Get the amount of events related to file removal :return: Amount of modification events :rtype: int """ return len([e for e in self.events if e.event == Event.FILE_ADDED]) def _get_removal_event_count(self): """ Get the amount of events related to file removal :return: Amount of modification events :rtype: int """ return len([e for e in self.events if e.event == Event.FILE_REMOVED]) def _get_modified_event_count(self): """ Get the amount of events related to file modification :return: Amount of modification events :rtype: int """ return len([e for e in self.events if e.event == Event.FILE_MODIFIED])
class Scanner: """ Core scanner class Handles iteration and keeps track of all files """ MODE_TAG = "tag" MODE_COMMIT = "commit" MODE_BRANCH = "branch" LOG_LEVEL_INFO = "*" LOG_LEVEL_ERROR = "!" LOG_LEVEL_WARNING = "-" LOG_LEVEL_SUCCESS = "+" DEFAULT_USER_AGENT = f"GitFingerprint/{__version__}" def __init__(self, url: str = "", mode: str = MODE_BRANCH, webroot: str = "", git_repo_path: str = "", max_remote_threads: int = 10, max_local_threads: int = 5, verify_ssl: bool = True, debug: bool = False, session: requests.Session = None, user_agent: str = None): """ Scanner constructor :param url: Base url of the remote host :param mode: Scan mode (tag, commit, branch) :param webroot: Local webroot in the git path :param git_repo_path: Local git repository path :param max_remote_threads: Maximum amount of threads to use while connecting to the remote host :param verify_ssl: Should SSL certificates be verified? :param session: Optional requests object, used to fetch data """ self.__url = url.rstrip("/") + "/" self._mode = mode self.__root = webroot.lstrip("\\/") self._files_local = OrderedDefaultDict(dict) self.__files_remote = OrderedDefaultDict(dict) self.__max_remote_threads = max_remote_threads self._max_local_threads = max_local_threads self.__session = session or requests self.__verify_ssl = verify_ssl self.__repo = Repo(path=git_repo_path) self.__repo.heads.master.checkout("-f") self.__base = join(self.git_path, self.__root) self.__request_count = 0 self._hashing_algorithm = "sha256" self.__hashing_interrupted = False self.__debug = debug self.__user_agent = user_agent or self.DEFAULT_USER_AGENT self.on_log = Event() self.on_progress = Event() @property def hashing_algorithm(self): """ Get he currently used hashing algorithm :return: String """ return self._hashing_algorithm @hashing_algorithm.setter def hashing_algorithm(self, algorithm: str): """ Set the hashing algorithm to use :param algorithm: String :return: None """ if algorithm not in hashlib.algorithms_guaranteed: raise GitFingerprintException( f"Invalid hashing algorithm '{algorithm}', accepted algorithms: " f"{', '.join(hashlib.algorithms_guaranteed)}.") self._hashing_algorithm = algorithm @property def cache(self): """ Get the local cache :return: Ordered dict, """ return { "hashing_algorithm": self._hashing_algorithm, "files": list(self._files_local.items()) } @cache.setter def cache(self, cache): """ Restore local files from a previous session (validate if the cache is valid) :return: None """ if cache.get("hashing_algorithm") != self._hashing_algorithm: raise GitFingerprintException( f"Refused to overwrite cache, hashing algorithm " f"'{cache.get('hashing_algorithm')}' does not match hashing " f"algorithm '{self._hashing_algorithm}'.") for head, files in cache.get("files", []): self._files_local[head] = files @property @lru_cache() def results(self): """ Calculate all results and sort them :return: Sorted results by amount of valid hashes and total successful hits """ results = defaultdict(Counter) remote = {} for file, details in self.__files_remote.items(): if details.get("status") != 404: remote[file] = details total_hits = len(remote) for head, files in self.__get_hashes_per_head(): results[head]["hits"] = total_hits / (len(files) / 100) results[head]["hits"] = results[head][ "hits"] if results[head]["hits"] < 100 else 100 results[head]["hashes"] = 0 for file, details in remote.items(): if file in files and details.get("hash") == files[file]: results[head]["hashes"] += 1 results[head]["hashes"] = results[head]["hashes"] / ( (total_hits / 100) or 1) return sorted(results.items(), key=lambda x: (x[1]["hashes"], x[1]["hits"]), reverse=True) @property @lru_cache() def all_heads(self): """ Get a list of all local heads :return: List of unique heads """ return set(self._files_local.keys()) @property @lru_cache() def all_files(self) -> Set[str]: """ Unique list of all files :return: List of all unique file names """ return set(self.__all_files()) def __all_files(self): """ Get all files used in the tree structure :return: Iterator of strings """ for head, files in self._files_local.items(): for file in files.keys(): yield file def __get_hashes_per_head(self): """ Get all hashes per head, prevents us from having to copy deep structures into memory, this method creates the dictionary as the head moves from commit to commit. :return: Iterator """ tracker = {} for head, files in self._files_local.items(): for file, file_hash in files.items(): if file_hash is not None: tracker[file] = file_hash if file_hash is None and file in tracker: del tracker[file] yield head, tracker @property @lru_cache() def mode(self) -> str: """ Mode getter, converts all half :return: String """ for key in dir(self): if key.startswith("MODE_"): if getattr(self, key)[0].lower() == self._mode[0].lower(): return getattr(self, key) raise GitFingerprintException(f"Invalid scan mode: {self._mode}.") @property def git_path(self) -> str: """ Getter for the absolute git path :return: String """ return self.__repo.working_dir def log(self, message, level: str = LOG_LEVEL_INFO): """ Shorthand for firing the log event :param message: :param level: :return: """ self.on_log.fire(message, level) def scan_local(self): """ Build the local file tree :return: None """ self.log(f"Building {self.mode} tree with hashing " f"algorithm {self.hashing_algorithm}..") if self.mode == self.MODE_BRANCH: self.__build_file_tree(self.__branch_iterator()) if self.mode == self.MODE_TAG: self.__build_file_tree(self.__tag_iterator()) if self.mode == self.MODE_COMMIT: self.__build_commit_tree() def scan_remote(self): """ Start scanning the remote host and return the results :return: None """ self.log(f"Scanning remote host ({len(self.all_files)} files " f"over {len(self._files_local)} {self.mode}s)..") pool = ThreadPool(self.__max_remote_threads) pool.map_async(self.request, self.all_files) pool.close() pool.join() def __build_commit_tree(self): def on_error(e): print(str(e) + "\n" + "".join(format_tb(e.__traceback__))) exit(1) cache = next(reversed(self._files_local), None) total = self.__repo.head.commit.count() commits = [] for commit in self.__repo.iter_commits(reverse=True): if commit.hexsha not in self._files_local: commits.append(commit.hexsha) commits = chunks(commits, self._max_local_threads) commit_chunk_size = len(next(iter(commits), [])) builder = partial(self.__build_commit_tree_thread, total) if cache: self.log(f"Starting at cached commit: {cache}", self.LOG_LEVEL_INFO) pool = ThreadPool(processes=self._max_local_threads + 1) try: self.log( f"Setting up {self._max_local_threads} temporary repositories " f"and assigning {commit_chunk_size} commits per thread (total " f"commits: {total}).") pool.map_async(builder, commits, error_callback=on_error) pool.close() pool.join() except KeyboardInterrupt: self.log("Interrupt detected, halting threads..", self.LOG_LEVEL_WARNING) self.__hashing_interrupted = True pool.join() raise finally: self.log("Cleaning up temporary repositories..") self.log("Finished building commit tree..") @temporary_directory def __build_commit_tree_thread(self, temp: str, total: int, commits: List[str]): """ Build a part of the commit tree in a thread :param temp: :param total: :param commits: :return: """ if self.__debug: self.log(f"Setting up temporary repository in: {temp}") copytree(join(self.git_path, ".git"), join(temp, ".git")) temp_repo = Repo(temp) temp_repo.git.checkout("master", "-f") for index, commit in enumerate(commits): if self.__hashing_interrupted: break if index % 5 == 0: self.on_progress.fire(current=len(self._files_local) + 1, total=total) try: temp_repo.git.checkout(commit, "-f") files = temp_repo.head.commit.stats.files self._files_local[commit] = { } # Ensures it gets skipped next time round except (GitCommandError, OSError, ValueError): break for file in files: if not file.startswith(self.__root): continue path = join(temp, file) file = relpath(file, self.__root) if isfile(path): with open(path, "rb") as buff: self._files_local[commit][file] = self.hash(buff) else: self._files_local[commit][file] = None def __branch_iterator(self): """ Walk over all branches in the head and check them out :return: Head """ for branch in self.__repo.git.branch("-a").splitlines(keepends=False): if " -> " in branch: continue branch = branch.strip("* ") self.__repo.git.checkout(branch) yield from self.__walk_local_files(branch) def __tag_iterator(self): """ Walk over all tags individually :return: Head """ for tag in self.__repo.tags: self.log(f"Scanning tag '{tag.name}'..") self.__repo.git.checkout(tag.name) yield from self.__walk_local_files(tag.name) def __walk_local_files(self, head) -> Tuple[str, str]: """ Method that walks over all files, and joins their relative paths :return: Tuple """ for path, _, files in walk(self.__base): for file in files: with open(join(path, file), "rb") as buff: yield head, relpath(join(path, file), self.__base), self.hash(buff) def __build_file_tree(self, iterator): """ Generic 'build file tree' method :param iterator: :return: """ for head, file, file_hash in iterator: self._files_local[head][file] = file_hash def request(self, file): """ Make a (threaded) request to the server :param file: File we're going to request """ resp = self.__session.get(url=self.__url + file, allow_redirects=False, verify=self.__verify_ssl, headers={"User-Agent": self.__user_agent}) self.__files_remote[file]["hash"] = self.hash(BytesIO(resp.content)) self.__files_remote[file]["status"] = resp.status_code self.__request_count += 1 if self.__request_count % 10 == 0: self.on_progress.fire(count=self.__request_count) def hash(self, file, block_size=65536) -> str: """ Hash a file efficiently using the algorithm specified :param file: File like object to hash :param block_size: Buffer size :return: Hash as a string """ algo = getattr(hashlib, self._hashing_algorithm)() for block in iter(lambda: file.read(block_size), b""): algo.update(block) return str(algo.hexdigest())