def check_archives(self): if not self.active: return None print_w_time("ECBSX store archive checking intitiated, store :", self.name) partial = [] full_okay = [] full_failed = [] unrelated = [] for d, _, files in os.walk(self.mount_dir): for f in files: full_path = os.path.join(d, f) if full_path.endswith(".ecsbx.part"): printin(1, "Ignoring partial copy " + full_path) partial.append(full_path) elif full_path.endswith(".ecsbx"): printin(1, "Checking " + full_path) res = blkar.check_file(full_path) error = res["error"] if error != None: printin(2, error) printin(2, "Ignoring file") continue repair = False failed_block_count = res["stats"][ "numberOfBlocksFailedCheck"] if failed_block_count > 0: printin(2, failed_block_count, "blocks failed check") repair = True if failed_block_count == 0: res = blkar.check_file_hash_only(full_path) error = res["error"] if error == None: if "recordedHash" in res["stats"]: recorded_hash = res["stats"]["recordedHash"] data_hash = res["stats"]["hashOfStoredData"] if recorded_hash != data_hash: printin(3, "Hash check failed") repair = True else: printin(3, "No hash recorded") else: printin(3, error) printin(3, "Ignoring file") if repair: full_failed.append(full_path) printin(2, "Added to repair list") else: full_okay.append(full_path) else: printin(1, "Ignoring unrelated file " + full_path) unrelated.append(full_path) self.to_be_repaired = full_failed.copy() return partial, full_okay, full_failed, unrelated
def display(self): if self.__messages == []: print_w_time("No warnings") else: print_w_time("Warnings :") for (time, msg, _) in self.__messages: printin(1, time.strftime("%Y-%m-%d %H:%M:%S"), "-", msg) self.clear()
def print_system_info(): print_w_time("System information") printin(1, sys_info["acronym"] + " version : " + sys_info["daams_version"]) printin(1, "OS name : " + platform.system()) printin(1, "OS release : " + platform.release()) printin(1, "Machine type : " + platform.machine()) printin(1, "Python version : " + platform.python_version()) printin(1, "blkar version : " + sys_info["blkar_version"])
def self_check_hard_fail(self): print_w_time("CPU monitor self check with hard fail") temp = self.get_temperature() if temp == None: printin(1, "Failed to get temperature") shutdown_error() else: printin(1, "Okay") printin(2, "Temperature :", temp)
def check_dependencies(): print_w_time("Checking system dependencies") try: sys_info["blkar_version"] = blkar.check_for_installation() if shutil.which("hddtemp") == None: raise Exception("hddtemp not detected") except Exception as e: printin(1, e.args[0]) shutdown_error() printin(1, "Okay")
def mount(self): print_w_time("Mounting partition", self.part, "to", self.mount_dir) lines = [x for x in self.mount_output_lines() if self.part in str(x)] if lines == []: completed = subprocess.run(["mount", self.part, self.mount_dir], capture_output=True) if completed.returncode == 0: printin(1, "Okay") else: printin(1, "Failed to mount") else: printin(1, "Skipped, partition already mounted")
def unmount(self): print_w_time("Unmounting directory", self.mount_dir) lines = [x for x in self.mount_output_lines() if self.part in str(x)] if lines == []: printin(1, "Skipped, disk is not mounted") else: completed = subprocess.run(["umount", self.mount_dir], capture_output=True) if completed.returncode == 0: printin(1, "Okay") else: printin(1, "Failed to unmount")
def health_check(self): print_w_time("ECSBX store status update, store :", self.name) printin(1, "Checking if mount point is still accessible") if not self.check_if_accessible(): printin(2, "Not accessible, marking as inactive") self.warning_board.push("ECSBX store " + self.name + " is no longer active") printin(2, "Persistant warning registered") self.unmount() printin(1, "Checking disk health") super().health_check()
def load_file(self, file_path): print_w_time("Loading configuration file") try: with open(file_path) as f: config = yaml.safe_load(f.read()) check_config(config) self.__config = config if "ecsbx_stores" in config: self.__ecsbx_stores = map( lambda d: ECSBXStoreConfig(name=d["name"], partition=d["partition"], mount_dir=d["mount_dir"], smart_enabled=d[ "smart_enabled"]), config["ecsbx_stores"]) if "cpu_health" in config: cpu_health = config["cpu_health"] self.__cpu_health = CPUHealthConfig( warn_temperature=cpu_health["warn_temperature"], shutdown_temperature=cpu_health["shutdown_temperature"] ) if "disk_health" in config: disk_health = config["disk_health"] self.__disk_health = DiskHealthConfig( warn_temperature=disk_health["warn_temperature"], shutdown_temperature=disk_health[ "shutdown_temperature"]) if "delay_before_sched_sec" in config: self.__delay_before_sched_sec = config[ "delay_before_sched_sec"] except IsADirectoryError: printin( 1, "Configuration file " + '"' + file_path + '"' + " is a directory") shutdown_error() except FileNotFoundError: printin( 1, "Configuration file " + '"' + file_path + '"' + " does not exist") shutdown_error() except yaml.YAMLError as e: printin( 1, "Failed to parse configuration file " + '"' + file_path + '"') printin(1, "Error :") print(indent_str(2, str(e))) shutdown_error()
def repair_archives(self): if not self.active: return print_w_time("ECBSX store archive repair intitiated, store :", self.name) if self.to_be_repaired == []: printin(1, "No archives to be repaired") else: for f in self.to_be_repaired: printin(1, "Repairing " + f) res = blkar.repair_file(f) print(res) self.to_be_repaired = []
def check_config(config): print_w_time("Checking configuration file") try: check_root_keys(config) check_ecsbx_stores(config) check_cpu_health(config) check_disk_health(config) check_delay_before_sched_sec(config) except KeyError as e: printin(1, "Key", str(e), "misisng") shutdown_error() except Exception as e: printin(1, str(e)) shutdown_error()
def health_check(self): print_w_time("CPU health check") temp = self.get_temperature() printin(1, "Temperature :", temp) if temp >= self.__shutdown_temperature: printin(2, "CPU temperature has reached shutdown threshold") printin(2, "Shutting down OS") raise (OSShutdownRequest) elif temp >= self.__warn_temperature: printin(2, "CPU temperature has reached warning threshold") printin(2, "One-off warning registered") self.__warning_board.push("CPU temperature at " + str(temp) + ", please check for ventilation status", persist=False)
def health_check(self): print_w_time("Disk health check, disk :", self.disk) if self.smart_enabled: temp = self.get_temperature() printin(1, "Temperature :", temp) if temp >= self.shutdown_temperature: printin(2, "Disk temperature has reached shutdown threshold") printin(2, "Shutting down OS") raise(OSShutdownRequest) elif temp >= self.warn_temperature: printin(2, "Disk temperature has reached warning threshold") printin(2, "One-off warning registered") self.__warning_board.push("Disk temperature at " + str(temp) + ", please check for ventilation status", persist=False) else: printin(1, "SMART monitoring not enabled, check skipped")
def self_check_hard_fail(self): print_w_time("Disk controller self check with hard fail, disk :", self.disk) if self.smart_enabled: temp = self.get_temperature() reallocated_sector_count = self.get_reallocated_sector_count() if temp == None: printin(1, "Failed to get temperature") shutdown_error() elif reallocated_sector_count == None: printin(1, "Failed to get reallocated sector count") shutdown_error() else: printin(1, "Okay") printin(2, "Temperature :", temp) printin(2, "Reallocated sector count :", reallocated_sector_count) else: printin(1, "SMART monitoring not enabled, check skipped")
def print_start_up_message(): print_w_time(sys_info["full_name"], "starting")
def check_system_rights(): print_w_time("Checking system rights") if os.geteuid() != 0: printin(1, sys_info["acronym"], "requires root access") shutdown_error() printin(1, "Okay")
def shutdown_error(): print_w_time("Shutting down", sys_info["acronym"]) exit(1)
def shutdown_normal(): print_w_time("Shutting down", sys_info["acronym"]) exit(0)
def main(): parser = argparse.ArgumentParser(prog=sys_info["acronym"]) parser.add_argument("--config", metavar="CONFIG", default="daams.config", help="configuration file to use") parser.add_argument("--acronym", action="version", help="show acronym", version=sys_info["acronym"]) parser.add_argument("--full-name", action="version", help="show full name", version=sys_info["full_name"]) parser.add_argument("--version", action="version", help="show acrynym and version number", version=sys_info["acronym"] + " " + sys_info["daams_version"]) parser.add_argument("--version-long", action="version", help="show full name and version number", version=sys_info["full_name"] + " " + sys_info["daams_version"]) parser.add_argument("--check-only", action="store_true", help="complete all initial checks then exit") args = parser.parse_args() system_diagnostics.print_start_up_message() system_diagnostics.check_system_rights() system_diagnostics.check_dependencies() system_diagnostics.print_system_info() config = Config() config.load_file(args.config) warning_board = WarningBoard() cpu_monitor = CPUMonitor(config.cpu_health(), warning_board) cpu_monitor.self_check_hard_fail() ecsbx_stores = [ ECSBXStore(x, config.disk_health(), warning_board) for x in config.ecsbx_stores() ] for ecsbx_store in ecsbx_stores: ecsbx_store.self_check_hard_fail() if args.check_only: print("All initial checks completed") shutdown_normal() try: delay_before_sched_sec = config.delay_before_sched_sec() print_w_time("Waiting for", str(delay_before_sched_sec), "seconds before scheduling tasks") time.sleep(delay_before_sched_sec) print_w_time("Scheduling tasks") scheduler = sched.scheduler(time.time, time.sleep) init_tasks(ecsbx_stores) schedule_tasks(scheduler, cpu_monitor, ecsbx_stores, warning_board) scheduler.run() except KeyboardInterrupt: print() for ecsbx_store in ecsbx_stores: ecsbx_store.unmount() shutdown_normal() except OSShutdownRequest: for ecsbx_store in ecsbx_stores: ecsbx_store.unmount() shutdown_error()
def shutdown_os(): print_w_time("Shutting down OS") subprocess.run(["shutdown", "-h", "now"])