def get_znode_tree_from_qconf(cluster_name, path, nodes, current_id='1', parent_id='0'): """get zookeeper nodes from qconf recursively, format as ztree data """ from lib.zyqconf import qconf_py # 节点名只取最末尾的名称 name = path if path == "/" else path.rsplit('/', 1)[-1] nodes.append({ "id": current_id, "pId": parent_id, "name": name, "path": path }) children = [] try: children = qconf_py.get_batch_keys(path, cluster_name) except qconf_py.Error as exc: # fix bug for qconf get_batch_keys from root path "/" if exc.message == "Error parameter!": zoo_client = ZookeeperService.get_zoo_client(cluster_name) children = zoo_client.get_children(path) else: log.warning('Node does not exists on QConf agent: %s', path) for idx, child in enumerate(children): # 左填充0到数字, 避免树的广度过宽,id冲突错误, 01, 09... idx = '{0:02d}'.format(idx) # parent_id as 1, then child_id should be 10, 11, 12... child_id = "{0}{1}".format(current_id, idx) child_path = os.path.join(path, str(child)) get_znode_tree_from_qconf(cluster_name, child_path, nodes, child_id, current_id)
def get_znode_tree(zoo_client, path, nodes, current_id='1', parent_id='0'): """get zookeeper nodes recursively, format as ztree data """ # 节点名只取最末尾的名称 name = path if path == "/" else path.rsplit('/', 1)[-1] nodes.append({ "id": current_id, "pId": parent_id, "name": name, "path": path }) try: children = zoo_client.get_children(path) except NoNodeError as exc: log.warning('Node does not exists on zookeeper: %s', path) else: for idx, child in enumerate(children): # 左填充0到数字, 避免树的广度过宽,id冲突错误, 01, 09... idx = '{0:02d}'.format(idx) # parent_id as 1, then child_id should be 10, 11, 12... child_id = "{0}{1}".format(current_id, idx) # child_path = os.path.join(path, child) child_path = path + child if path == "/" else path + "/" + child ## child node edit # child_name = child_path if child_path == "/" else child_path.rsplit('/', 1)[-1] # nodes.append({ # "id": child_id, # "pId": current_id, # "name": child_name, # "path": child_path # }) get_znode_tree(zoo_client, child_path, nodes, child_id, current_id)
def get_znode_tree_from_qconf(cluster_name, path, nodes, current_id='1', parent_id='0'): """get zookeeper nodes from qconf recursively, format as ztree data """ from lib.zyqconf import qconf_py # 节点名只取最末尾的名称 name = path if path == "/" else path.rsplit('/', 1)[-1] nodes.append({ "id": current_id, "pId": parent_id, "name": name, "path": path }) children = [] try: children = qconf_py.get_batch_keys(path, cluster_name) except qconf_py.Error as exc: # fix bugs for qconf's get_batch_keys error while path is root path("/") if exc.message == "Error parameter!": zoo_client = ZookeeperService.get_zoo_client(cluster_name) children = zoo_client.get_children(path) else: log.warning('Node does not exists on QConf Agent, path: %s', path) for idx, child in enumerate(children): child_path = os.path.join(path, str(child)) # 如果父节点ID为1,则它的子节点ID应为101, 102, 103(左填充0到数字, 避免树的广度过宽,id冲突错误, 01, 09...) child_id = "{0}{1:02d}".format(current_id, idx) get_znode_tree_from_qconf(cluster_name, child_path, nodes, child_id, current_id)
def log_format(instance, func_name=None, params=None, error_info=None): """格式化log信息 Args: instance: 类实例,当前业务环境下针对Handler类 func_name: 类中调用返回为空的方法 params: str 需要在log中说明的参数 error_info: error级log的错误信息 """ if inspect.isclass(type(instance)): module_name = instance.__module__ class_name = instance.__class__.__name__ if not params: params = instance.request.uri end_time = time.time() spend_time = round((end_time - instance._start_time) * 1000, 2) if error_info: log.error('%s.%s faild spend_time:%sms params:(%s) error info:%s', module_name, class_name, spend_time, params, error_info) return if func_name: log.warning('%s.%s call %s faild spend_time:%sms params:(%s)', module_name, class_name, func_name, spend_time, params) else: log.warning('%s.%s faild spend_time:%sms params:(%s)', module_name, class_name, spend_time, params)
def _exec(self): """with退出处理 """ try: self._args_set() except ValueError as e: log_format(self, error_info=e) msg = "参数错误,请检查参数后再请求! {}".format(e) return self.send_obj({"status": 1000, "msg": msg}) try: res = self.response() except OperationalError as e: # Mysql 连接错误 log.warning("Mysql OperationalError: %s", e) reconnect() # 如果连接失败重新加载当前请求页面 return self.redirect(self.request.uri) except Exception: import traceback e = traceback.format_exc() log.error("Internal Error: %s", e) return self.send_obj({"status": 10001, "msg": str(e)}) if not self._finished: return self.send_obj(res)
def close(self, commit=True): # TODO закрывать через очередь self.execute("UPDATE lootnika SET self_status='shutdown successfully'") if commit: self.execute('--commit--') else: log.warning("Closing Datastore without commit") self.execute('--close--')
def run(self): n = 1 crash = False # при краше нужно только обновлтяь статусы потоков while True: self.resources_usage() self.threads_names() # for i in get_threads(): # sout.print(f'{i} {i.isAlive()}', 'green') # print('------') for i in self.myThreads: if i in self.allThreads: # message(i+ ' Run',clrSun) self.myThreads[i] = True else: self.myThreads[i] = False # отмечает какие модули запустились self.isVerified = True for i in self.myThreads: if not self.myThreads[i]: self.isVerified = False if not crash: # ждёт запуска всех модулей if not self.started: if self.isVerified: if sys.argv[0].lower().endswith('.exe'): log.info( f"Lootnika started - Executable version: {__version__}_{platform}" ) else: log.info( f"Lootnika started - Source version: {__version__}_{platform}" ) log.info( f"Welcome to http://localhost:{cfg['rest']['port']}/admin" ) ds.execute("UPDATE lootnika SET self_status='working'") self.started = True self.rate = 2 # уже можно реже смотреть else: n += 1 if n == 20: # ограничение времени запуска crash = self.crash( 'One of the modules does not work correctly') elif n == 10: log.warning("detected slow Lootnika startup") # иначе следит за их работой else: if not self.isVerified and not self.exit: crash = self.crash( "One of the modules does not work correctly") time.sleep(self.rate)
def shutdown_me(signum, frame, appServerSvc=None): # ловит ctrl-C. Останавливает модули в нужном порядке log.warning('Stopping...') if appServerSvc: appServerSvc.daemon.exit() log.info("Shutdown is successful") os._exit(0)
def shutdown_me(signum=1, frame=1): """ Останавливает модули в нужном порядке """ log.warning( f'Lootnika stopping on {cfg["rest"]["host"]}:{cfg["rest"]["port"]}') if selfControl.exit: return selfControl.exit = True selfControl.rate = 0.3 n = 0 try: while True: time.sleep(0.3) if not bool(selfControl.myThreads): break if selfControl.myThreads['RestServer']: if n < 1: log.debug("Stopping REST server") try: if cfg["rest"]["host"] in ['::1', '0.0.0.0']: host = '127.0.0.1' else: host = cfg["rest"]["host"] cnx = httpClient.HTTPConnection(host, cfg["rest"]["port"], timeout=12) cnx.request(method="GET", url='/a=stop?stop') cnx.getresponse() except Exception: pass n = 1 continue elif selfControl.myThreads['Scheduler']: if n < 2: log.debug("Stopping Scheduler thread") scheduler.cmd = 'stop' n = 2 elif selfControl.myThreads['Datastore']: if n < 3: log.debug("Stopping Datastore thread") ds.close() n = 3 else: break except Exception as e: log.error(f'Shutdown failed: {traceback.format_exc()}') finally: selfControl.stop = True log.info("Lootnika stopped") if not stillWork: os._exit(1)
def _get_recursively(zoo_client, path, nodes): """get zookeeper nodes recursively """ try: data, _ = zoo_client.get(path) except NoNodeError as exc: log.warning("No node exists in path: %s", path) else: nodes.append({"path": path, "data": data}) for child in zoo_client.get_children(path): child_path = os.path.join(path, child) _get_recursively(zoo_client, child_path, nodes)
def _work_manager(self, taskName: str = '', lastTask: str = '', cmd=False): """ Обёртка исполнителя задания (Picker). Работает как таймер чтобы отложить запуск до заданного времени. Потому проверка статуса планировщика так же выполняется здесь. NOTE: сейчас планировщик сам проверяет время старта и запускает задание сразу """ self._update_startTime() if not (self.status == 'ready' or self.status == 'wait'): log.warning( f'Previous task is still running. Next start will be at {self.startTime}' ) return self.status = 'work' # он должен работатьт только при ready if self.taskCycles > 0: self.taskCycles -= 1 # if self.taskCycles==0: self.startTime = None # task может быть только при cmd=True if taskName != '': self._start_task(taskName) else: if not cmd: log.info('New tasks cycle') else: log.info('Start all tasks') for taskName in self.taskList: # в случае отмены не продолжать if self.status == 'cancel': # далее уже сам воркер следит даже если пауза self.curTask = '' self.status = 'ready' return else: self._start_task(taskName) self.curTask = '' if self.taskCycles > 0: self.status = 'wait' else: self.status = 'ready' if cmd: log.info('All tasks completed') else: if self.startTime is None: log.info('Tasks cycle done. Task replays are over') else: log.info(f'Tasks cycle done. Left: {self.taskCycles}')
def __init__(self, db: str): super(Datastore, self).__init__() log.debug(f"Starting Datastore thread") self.db = db self.name = 'Datastore' self.status = {} self.requestQ = Queue() self.isRestored = False self.isMemory = False self.isReady = False if db == ':memory:': log.warning('Using inMemory Datastore') self.isMemory = True self.start()
def log_inspector(): log.debug("log_inspector started") selfName = 'log_inspector' while True: try: for taskName, task in cfg['tasks']['logTask'].items(): log.info(f"Check log {taskName}") logFile = task['file'] templates = task['tmpl'] try: # TODO open if file is changed with open(logFile, encoding='utf-8') as f: cnt = f.read() for tmplName in templates: tmpl = templater.get_tmpl(selfName, tmplName) if tmpl in cnt: ev = f"Found log expression {taskName}: {tmplName}" log.warning(ev) body = templater.tmpl_fill(selfName, 'error').replace( '{{taskName}}', taskName, -1) event = 'error' new_toast('log_inspector', event) if 'eventScript' in task: allowSend, body = execute_event_script( log, task['eventScript'], taskName, event, body) else: allowSend = True if allowSend: send_notify(taskName, event, body) except FileNotFoundError: log.error(f"Not found log file {taskName}") except Exception as e: log.error(f"Fail to parse log file {taskName}: {e}") sleep(intervalCheckMin * 2) except Exception: e = traceback.format_exc() log.critical(str(e)) break
def _start_task(self, taskName: str): self.curTask = taskName log.info(f'Start task {taskName}') try: lg = create_task_logger(taskName, console) ts = TaskStore(taskName, lg, self.taskList[taskName]['overwriteTaskstore']) taskId = self._mark_task_start(taskName) # [total ,seen, new, differ, delete, task error, export error, last doc id] self.syncCount[taskId] = [-1, 0, 0, 0, 0, 0, 0, ''] cf = self.taskList[taskName] fc = Factory(taskName, lg, cfg['exporters'][cf['exporter']], self.syncCount[taskId]) picker = self.Picker(taskId, taskName, cf, lg, ts, fc, self.syncCount[taskId]) picker.run() tab = '\n' + '\t' * 5 lg.info(f"Task done" f"{tab}Total objects: {self.syncCount[taskId][0]}" f"{tab}Seen: {self.syncCount[taskId][1]}" f"{tab}New: {self.syncCount[taskId][2]}" f"{tab}Differ: {self.syncCount[taskId][3]}" f"{tab}Deleted: {self.syncCount[taskId][4]}" f"{tab}Task errors: {self.syncCount[taskId][5]}" f"{tab}Export errors: {self.syncCount[taskId][6]}") if self.syncCount[taskId][5] != 0: lg.warning('Task done with some errors. Check logs') if self.syncCount[taskId][6] != 0: log.warning( 'Task had errors with sending documents. ' f'Documents that were not sent are saved in a folder {picker.factory.failPath}' ) self.check_point(taskId, 'complete') except Exception as e: if log.level == 10: e = traceback.format_exc() log.error(f"Fail with task {taskName}: {e}")
def get_znode_tree(zoo_client, path, nodes, current_id='1', parent_id='0'): """get zookeeper nodes recursively, format as ztree data """ # 节点名只取最末尾的名称 name = path if path == "/" else path.rsplit('/', 1)[-1] nodes.append({ "id": current_id, "pId": parent_id, "name": name, "path": path }) try: children = zoo_client.get_children(path) except NoNodeError as exc: log.warning('Node does not exists on zookeeper: %s', path) else: for idx, child in enumerate(children): # 左填充0到数字, 避免树的广度过宽,id冲突错误, 01, 09... idx = '{0:02d}'.format(idx) # parent_id as 1, then child_id should be 10, 11, 12... child_id = "{0}{1}".format(current_id, idx) child_path = os.path.join(path, child) get_znode_tree(zoo_client, child_path, nodes, child_id, current_id)
def _exec(self): '''with退出处理 ''' try: self._args_set() except ValueError as e: log_format(self, error_info=e) msg = "参数错误,请检查参数后再请求! {}".format(e) return self.send_obj({'status': 1000, 'msg': msg}) try: res = self.response() except OperationalError as e: # Mysql 连接错误 log.warning('Mysql OperationalError: %s', e) reconnect() # 如果连接失败重新加载当前请求页面 return self.redirect(self.request.uri) except Exception: import traceback e = traceback.format_exc() log.error('Internal Error: %s', e) return self.send_obj({'status': 10001, 'msg': str(e)}) if not self._finished: return self.send_obj(res)
def log_format(instance, func_name=None, params=None, error_info=None): """格式化log信息 Args: instance: 类实例,当前业务环境下针对Handler类 func_name: 类中调用返回为空的方法 params: str 需要在log中说明的参数 error_info: error级log的错误信息 """ if inspect.isclass(type(instance)): module_name = instance.__module__ class_name = instance.__class__.__name__ if not params: params = instance.request.uri end_time = time.time() spend_time = round((end_time - instance._start_time) * 1000, 2) if error_info: log.error( "%s.%s faild spend_time:%sms params:(%s) error info:%s", module_name, class_name, spend_time, params, error_info, ) return if func_name: log.warning( "%s.%s call %s faild spend_time:%sms params:(%s)", module_name, class_name, func_name, spend_time, params, ) else: log.warning("%s.%s faild spend_time:%sms params:(%s)", module_name, class_name, spend_time, params)
def _create_db(self) -> sqlite3.Connection: """ Создание локальной бд. Восстановление с диска и флаг принудительной синхронизации :return: """ try: cnx = sqlite3.connect(self.db) except Exception as e: log.critical(f"Can't open local datastore {self.db}: {e}") raise Exception(e) # при старте всегда проверка статуса прошлого завершения работы fail = False try: cur = cnx.cursor() cur.execute('SELECT self_status FROM lootnika') except Exception: if self.db != ':memory:': log.warning(f'Creating new tasks journal scheme') fail = True if fail: try: cur.executescript(""" CREATE TABLE lootnika (self_status VARCHAR); CREATE TABLE tasks ( id INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE NOT NULL, name VARCHAR NOT NULL, start_time DATETIME, end_time DATETIME, status VARCHAR, count_total INTEGER DEFAULT (0), count_seen INTEGER DEFAULT (0), count_new INTEGER DEFAULT (0), count_differ INTEGER DEFAULT (0), count_delete INTEGER DEFAULT (0), count_task_error INTEGER DEFAULT (0), count_export_error INTEGER DEFAULT (0), last_doc_id VARCHAR); CREATE TRIGGER delete_till_100 INSERT ON tasks WHEN (SELECT count(*) FROM tasks)>100 BEGIN DELETE FROM tasks WHERE tasks.id IN ( SELECT id FROM tasks ORDER BY id LIMIT (SELECT count(*) - 100 FROM tasks) ); END; CREATE TABLE "sphinxbuilder" ( "id" INTEGER, "owner" TEXT, "name" TEXT, "path" TEXT, "hash" TEXT, PRIMARY KEY("id") ); """) cnx.commit() except Exception as e: log.error( f'Unable to create datastore scheme in lootnika_tasks_journal.db: {e}' ) cur.execute('SELECT self_status FROM lootnika') rec = cur.fetchone() if rec is None: cur.execute( "INSERT INTO lootnika('self_status') VALUES ('starting lootnika')" ) elif rec and rec[0] != 'shutdown successfully': log.warning( f'The previous shutdown was unexpected. Last lootnika status: {rec[0]}.' ) self.isRestored = True cur.execute("UPDATE lootnika SET self_status='starting lootnika'") cnx.commit() cur.close() return cnx
def check_rst(ds: Datastore) -> dict: #TODO need refactoring log.debug("Check documentation sources") changed = False try: rows = ds.select('SELECT * FROM sphinxbuilder', ) oldRst = { 'lootnika': { 'path': "docs/rst/", 'type': 'lootnika', 'rst': {} } } for row in rows: if row[1] not in oldRst: oldRst[row[1]] = {'rst': {}} oldRst[row[1]]['rst'][row[3]] = {'file': row[2], 'hash': row[4]} newRst = { 'lootnika': { 'path': "docs/rst/", 'type': 'lootnika', 'rst': {} } } for exporter in os.listdir(f'{homeDir}exporters'): path = f"exporters/{exporter}/docs/rst/" ls = os.listdir(f"{homeDir}{path}") if ls == []: log.warning(f"No documentation sources found for {exporter}") continue if exporter not in oldRst: log.info(f"Found new exporter docs: {exporter}") oldRst[exporter] = { 'path': path, 'type': 'exporter', 'rst': {} } newRst[exporter] = {'path': path, 'type': 'exporter', 'rst': {}} for file in ls: rst = f"{path}{file}" with open(f"{homeDir}{rst}", encoding='utf-8', mode='r') as cnt: hsh = f"{cityhash.CityHash64(cnt.read())}" newRst[exporter]['rst'][rst] = {'file': file, 'hash': hsh} if rst in oldRst[exporter]['rst']: if not oldRst[exporter]['rst'][rst]['hash'] == hsh: changed = True else: changed = True for picker in os.listdir(f'{homeDir}pickers'): path = f"pickers/{picker}/docs/rst/" ls = os.listdir(f"{homeDir}{path}") if ls == []: log.warning(f"No documentation sources found for {picker}") continue if picker not in oldRst: log.info(f"Found new picker docs: {picker}") oldRst[picker] = {'path': path, 'type': 'exporter', 'rst': {}} newRst[picker] = {'path': path, 'type': 'picker', 'rst': {}} for file in ls: rst = f"{path}{file}" with open(f"{homeDir}{rst}", encoding='utf-8', mode='r') as cnt: hsh = f"{cityhash.CityHash64(cnt.read())}" newRst[picker]['rst'][rst] = {'file': file, 'hash': hsh} if rst in oldRst[picker]['rst']: if not oldRst[picker]['rst'][rst]['hash'] == hsh: changed = True else: changed = True exporter = "lootnika" path = newRst[exporter]['path'] ls = os.listdir(f"{homeDir}{path}") for file in ls: rst = f"{path}{file}" with open(f"{homeDir}{rst}", encoding='utf-8', mode='r') as cnt: hsh = f"{cityhash.CityHash64(cnt.read())}" newRst[exporter]['rst'][rst] = {'file': file, 'hash': hsh} if rst in oldRst[exporter]['rst']: if not oldRst[exporter]['rst'][rst]['hash'] == hsh: changed = True else: changed = True if changed: log.warning( "Found changes in documentations. Start me with <make-doc> key." ) return newRst except Exception as e: raise Exception( f"Fail check sources for help documentation: {traceback.format_exc()}" )
def process_inspector(): def get_pid(exe: str, exePath: str, workDir: str = None) -> int: # if give workDir, will check only it for p in psutil.process_iter(["name", 'exe', 'cwd']): # if 'calc1' in p.info['name']: # sout(f"{p.pid} | {p.info['name']} | {p.info['cwd']} | {p.info['exe']}", 'violet' ) if exe == p.info['name'].lower(): if workDir: if not p.info['cwd'].endswith('/'): p.info['cwd'] = f"{p.info['cwd']}/" if workDir.lower() == p.info['cwd'].replace('\\', '/', -1).lower(): return p.pid else: if PLATFORM == 'nt': exePath = f"{exePath}{exe}" else: exePath = exePath[:-1] if exePath.lower() == p.info['exe'].replace('\\', '/', -1).lower(): return p.pid def restart(job: dict, exePid: int = None, killRecursive: bool = False) -> str: data = "" status = 0 failList[taskName]['attemp'] += 1 if exePid: try: assert exePid != os.getpid(), "won't kill myself" parent = psutil.Process(exePid) children = parent.children(killRecursive) children.append(parent) # TODO try soft kill before hard for p in children: try: # p.send_signal(signal.SIGTERM) p.kill() except psutil.NoSuchProcess: pass _, alive = psutil.wait_procs(children, timeout=60) if alive: raise Exception( f"Fail to kill process {exe} (PID {exePid})") except Exception as e: data = f'Fail to restart process {exe}: {e}\n' log.error(data) status = 2 if status == 0: log.debug(f"Launch application {taskName}") whatStart = job['whatStart'] if whatStart == 'command': target = job['command'] elif whatStart == 'exe': target = f"{job['exePath']}{exe} {job['exeKey']}" else: target = None if target: log.info(f"Starting {taskName}") try: if PLATFORM == 'nt': os.system(f"start cmd /c {target}") else: os.system(f"command {target} &") except Exception as e: data = f"Fail to restart application: {exe} ({taskName}): {e}\n" status = 3 else: log.info(f"Starting service {job['service']}") try: if PLATFORM == 'nt': win32serviceutil.StartService(job['service']) else: os.system(f"systemctl start {job['service']}") except Exception as e: e = traceback.format_exc() log.error(str(e)) status = 3 data = f"Fail to start service: {job['service']} ({taskName}): {e}\n" # проверка что он снова не упал # TODO отсчёт времени падения после старта if status == 0: sleep(restartTime) if get_pid(exe, checkPath, workDir): data += 'Successfully restarted application' failList[taskName]['isAlive'] = False failList[taskName]['attemp'] -= 1 log.info(data) else: data += f'Fail to start {taskName}' log.error(data) else: log.error(data) new_toast(taskName, data) return data sleep(3) selfName = "process_inspector" failList = {} for job in jobList: failList[job] = {'isAlive': False, "attemp": 0} while True: try: for job in jobList.values(): taskName = job['task'] exe = job['exe'].lower() checkPath = job['checkPath'] exePath = job['exePath'] workDir = job['workDir'] doRestart = job['doRestart'] alwaysWork = job['alwaysWork'] restartTime = job['restartTime'] respTime = job['respTime'] status = 0 body = '' log.info(f'Check app {taskName}') exePid = get_pid(exe, checkPath, workDir) if exePid and not job['checkUrl']: log.debug(f"{taskName} is fine.") elif exePid and job['checkUrl']: log.debug(f"Found {taskName}. Check http status") try: res = requests.get(job['url'], timeout=respTime) if res.status_code != 200: raise Exception( f"Server return status {res.status_code}") log.debug(f"{taskName} is fine.") if not failList[taskName]['isAlive']: continue else: failList[taskName]['isAlive'] = False data = templater.tmpl_fill(selfName, 'alive') except Exception: status = 1 data = f"{taskName} didn't respond or return wrong answer. Trying to restart application\n" new_toast(f'Restarting {taskName}', data) log.warning(data) body = templater.tmpl_fill(selfName, "badAnswer").replace( "{{taskName}}", taskName, -1) failList[taskName]['isAlive'] = True if status != 0 and doRestart: data += restart(job, exePid) body += data if 'eventScript' in job: allowSend, body = execute_event_script( log, job['eventScript'], taskName, 'badAnswer', body) else: allowSend = True if allowSend: send_notify(taskName, 'badAnswer', body) elif not exePid and alwaysWork: body = templater.tmpl_fill(selfName, 'notFound').replace( "{{taskName}}", taskName, -1) data = f"Not found required application {taskName}. Trying to restart\n" log.warning(data) new_toast(f'Starting {taskName}', data) data += restart(job, exePid) body += data new_toast('log_inspector', 'notFound') if 'eventScript' in job: allowSend, body = execute_event_script( log, job['eventScript'], taskName, 'notFound', body) else: allowSend = True if allowSend: send_notify(taskName, 'notFound', body) sleep(intervalCheckMin) except Exception: e = traceback.format_exc() log.critical(str(e)) break
def disk_inspector(): def fill_tmpl(event: str) -> str: body = templater.tmpl_fill(selfName, event) body = body.replace('{{critFree}}', str(critFree), -1) body = body.replace('{{diskFree}}', str(diskFree), -1) body = body.replace('{{diskUsage}}', diskUsage, -1) body = body.replace('{{taskName}}', taskName, -1) return body.replace('{{diskWarn}}', str(diskWarn), -1) log.debug("disk_inspector started") selfName = 'disk_inspector' while True: for taskName, task in cfg['tasks']['diskTask'].items(): critFree = task['critFree'] diskUsage = task['diskUsage'] diskWarn = task['diskWarn'] try: diskFree = round( shutil.disk_usage(diskUsage).free / 1073741824, 2) if diskFree < critFree: log.error( f"Free disk space is critically small on {diskUsage}: {diskFree}" ) event = 'critFree' body = fill_tmpl(event) new_toast( diskUsage, f"Free disk space is critically small: {diskFree}") if 'eventScript' in task: allowSend, body = execute_event_script( log, task['eventScript'], taskName, event, body) else: allowSend = True if allowSend: send_notify(taskName, event, body) elif diskFree < diskWarn: log.warning( f"Free disk space is ends {diskUsage}: {diskFree}GB") event = 'diskWarn' body = fill_tmpl(event) new_toast(diskUsage, f"Free disk space is ends: {diskFree}GB") if 'eventScript' in task: allowSend, body = execute_event_script( log, task['eventScript'], taskName, event, body) else: allowSend = True if allowSend: send_notify(taskName, event, body) elif diskFree > diskWarn: log.info(f"disk {diskUsage}: {diskFree}GB free") except FileNotFoundError: log.error(f'disk_inspector: wrong path: {diskUsage}') except Exception as e: log.critical(f'disk_inspector: {traceback.format_exc()}') shutdown_me(9, 9) sleep(intervalCheckMin)