def send_notify(taskName: str, event: str, body: str): try: now = dtime.datetime.now() if taskName not in sendedNotify: sendedNotify[taskName] = {} if event not in sendedNotify[taskName]: sendedNotify[taskName][event] = {"dtm": now, "body": body} else: delta = now - sendedNotify[taskName][event]['dtm'] if delta < resendTime: log.info( f"Reject report of an event {event}: is already sent.") return if cfg['notify']['onlyChanges'] and sendedNotify[taskName][event][ 'body'] == body: log.info( f"Reject report of an event {event}: is not changed from last sent" ) return log.debug(f"New report of an event {taskName}: {event}") if notify.send_notify(taskName, event, body): # update sendedNotify[taskName][event] = {"dtm": now, "body": body} else: del sendedNotify[taskName][event] except Exception as e: log.error(f"Fail send notify: {e}") del sendedNotify[taskName][event]
def send_notify(self, app:str, event:str, body:str) -> bool: try: # Формирует заголовок письма msg = MIMEMultipart('mixed') msg['Subject'] = templater.tmpl_fill(self.name, 'subject') msg['From'] = self.cfg['fromHeader'] msg['To'] = self.cfg['sendTo'] msg.attach(MIMEText(body)) except Exception as e: log.error(str(e)) log.debug(f"Connecting to email server {self.cfg['server']}") try: if self.cfg["useSSL"]: s = smtplib.SMTP_SSL(host=self.cfg['server'], port=self.cfg['port']) s.ehlo() s.login(self.cfg['user'], self.cfg['password']) s.auth_plain() else: s = smtplib.SMTP(self.cfg['server'], self.cfg['port']) s.ehlo().starttls().ehlo().login(self.cfg['user'], self.cfg['password']) # Рукопожатие, обязательно log.debug(f"Sending report") s.sendmail(self.cfg["fromHeader"], self.cfg["sendTo"], msg.as_string()) log.info(f"Report of an event {app} sent") return True except Exception as e: if e.errno == 11004: log.error("Fail to connect to email server") else: log.error("Fail to send report: %s" % e) return False
def crash(self, s: str) -> bool: # TODO write in datastore error message log.critical(s) for i in self.myThreads: if not self.myThreads[i]: log.critical(f"Core thread {i} is dead") log.debug('Calling shutdown') Thread(name="shutdown", target=shutdown_me, args=(1, '')).start() return True
def get_blacklist_module(): parse_blacklist = moudle_name for file_name in parse_blacklist: module = __import__('get_blacklist.{0}'.format(file_name), fromlist=True) log.debug('Downloading {0}.'.format(file_name)) try: module.main() log.debug('Download {0} completed.'.format(file_name)) except Exception as e: log.error('Download {0} failed.'.format(file_name))
def first_start_calc(cfg: dict, onStart=True): """ расчёт времени до первого старта заданий. Следующие старты расчитывает сам планировщик :param cfg: :param onStart: :return: """ def delay_calc(taskStartTime): startTime = dtime.datetime.now() if taskStartTime.lower() != 'now': now = dtime.datetime.now() now = now.hour * 3600 + now.minute * 60 + now.second try: nextStart = dtime.datetime.strptime(taskStartTime, '%H:%M:%S') nextStart = nextStart.hour * 3600 + nextStart.minute * 60 + nextStart.second if now > nextStart: delay = 86400 - now + nextStart # сегодня = что прошло+время завтра до старта startTime += dtime.timedelta(seconds=delay) if onStart: log.info(f"Tasks will start at {taskStartTime}") else: delay = nextStart - now startTime += dtime.timedelta(seconds=delay) if onStart: log.info(f"Tasks will start today at {taskStartTime}") except Exception as e: log.error( f'Check parameter taskStartTime: {e}. Correct format used HH:MM:SS' ) time.sleep(2) shutdown_me(1, '') return startTime taskStartTime = cfg["taskStartTime"] taskCycles = cfg["taskCycles"] repeatMin = cfg["repeatMin"] taskList = cfg['tasks'] if cfg["startTask"] and taskCycles != 0 and taskList is not None: log.debug( f"Tasks count: {len(taskList)}. Tasks cycles count: {taskCycles}") startTime = delay_calc(taskStartTime) else: # тут расписание можно запустить лишь командой. # Тогда taskCycles прибавит 1 # Он тут же начнёт задания и выполнит цикл один раз. # taskCycles станет 0 и закончит расписание startTime = None taskCycles = -1 repeatMin = -1 return startTime, taskCycles, repeatMin
def __init__(self, db: str): super(Datastore, self).__init__() log.debug(f"Starting Datastore thread") self.db = db self.name = 'Datastore' self.status = {} self.requestQ = Queue() self.isRestored = False self.isMemory = False self.isReady = False if db == ':memory:': log.warning('Using inMemory Datastore') self.isMemory = True self.start()
def log_inspector(): log.debug("log_inspector started") selfName = 'log_inspector' while True: try: for taskName, task in cfg['tasks']['logTask'].items(): log.info(f"Check log {taskName}") logFile = task['file'] templates = task['tmpl'] try: # TODO open if file is changed with open(logFile, encoding='utf-8') as f: cnt = f.read() for tmplName in templates: tmpl = templater.get_tmpl(selfName, tmplName) if tmpl in cnt: ev = f"Found log expression {taskName}: {tmplName}" log.warning(ev) body = templater.tmpl_fill(selfName, 'error').replace( '{{taskName}}', taskName, -1) event = 'error' new_toast('log_inspector', event) if 'eventScript' in task: allowSend, body = execute_event_script( log, task['eventScript'], taskName, event, body) else: allowSend = True if allowSend: send_notify(taskName, event, body) except FileNotFoundError: log.error(f"Not found log file {taskName}") except Exception as e: log.error(f"Fail to parse log file {taskName}: {e}") sleep(intervalCheckMin * 2) except Exception: e = traceback.format_exc() log.critical(str(e)) break
def run(self, Picker): log.debug("Starting Scheduler thread") self.Picker = Picker while not selfControl.started and self.cmd != 'stop': time.sleep(0.2) while self.cmd != 'stop': self._get_workers() # print('%s >= %s is %s' %(dtime.datetime.now(), self.startTime, self._isTaskTime())) # message(('self.status', self.status), clrSun) if self.status == 'ready': # если расписание не включено или всё выполнилось, # то переходит в ждущий режим if self.taskCycles > 0: self.status = 'wait' if self._isTaskTime(): ht = Thread(name='work_manager', target=self._work_manager) ht.start() self.workers.append(ht) # все последующие повторы отсчитываются от первого else: if self.taskCycles > 0: if self._isTaskTime(): ht = Thread(name='work_manager', target=self._work_manager) ht.start() self.workers.append(ht) elif self.taskCycles == 0: # -1 означает выкл. расписание if self.status == 'ready': log.info('Tasks cycle done') # print('!#cycle Status', self.status) time.sleep(1) # при выходе из цикла ждёт завершения работы рабочих и отменяет таймеры self.status = 'stop' self._get_workers() for ht in self.workers: # message(ht,clrSun) ht.join() log.debug("Stopped Scheduler thread") return
def __init__(self): log.debug("Starting SelfControl thread") super(SelfControl, self).__init__() self.name = 'SelfControl' self.myThreads = { 'RestServer': False, 'Scheduler': False, 'Datastore': False } self.allThreads = [] self.pid = os.getpid() self.pidUser = psutil.Process(self.pid).username() self.resourcesUsage = {"cpu": 0.0} self.isVerified = False # все модули работают self.started = False # все модули запустились self.exit = False # игнорит убитые модули self.rate = 0.5 # частота проверки self.start()
def run(self): cnx = self._create_db() cur = cnx.cursor() self.isReady = True while True: try: req, arg, res, token = self.requestQ.get() # print(token, self.status[token]) # print(token,req) if req == '--close--': if self.status[token] == -1: self.status[token] = 0 self.requestQ.task_done() break elif req == '--commit--': cnx.commit() if self.status[token] == -1: self.status[token] = 0 self.requestQ.task_done() continue # print(token,'!#run-run') cur.execute(req, arg) if res: for row in cur: res.put(row) res.put('--no more--') if self.status[token] == -1: self.status[token] = 0 self.requestQ.task_done() except Exception as e: # print(token, 'error') e = f'Unable to access to {self.name}: {e}' log.error(e) self.status[token] = e self.requestQ.task_done() cnx.close() self.requestQ.task_done() log.debug("Stopped Datastore thread")
def load_config(self, config: configparser, proxy:dict = None) -> dict: self.cfg['proxy'] = proxy try: self.cfg["sendTo"] = config.get(self.name, "sendTo") self.cfg["server"] = config.get(self.name, "server") self.cfg["port"] = config.getint(self.name, "port") self.cfg["useSSL"] = config.getboolean(self.name, "useSSL") self.cfg["user"] = config.get(self.name, "user") self.cfg["password"] = config.get(self.name, "password") self.cfg["fromHeader"] = config.get(self.name, "fromHeader") log.info(f'Recipient mail address {self.cfg["sendTo"]}') except Exception as e: e = f"Bad {self.name} configuration: {e}" log.error(e) raise Exception(e) if re.findall(r'\w+@\w+.\w+', self.cfg["sendTo"]): log.debug(f'Recipient mail address: {self.cfg["sendTo"]}') else: log.error("Wrong email sendTo.") raise SystemExit(1) return self.cfg
def shutdown_me(signum=1, frame=1): """ Останавливает модули в нужном порядке """ log.warning( f'Lootnika stopping on {cfg["rest"]["host"]}:{cfg["rest"]["port"]}') if selfControl.exit: return selfControl.exit = True selfControl.rate = 0.3 n = 0 try: while True: time.sleep(0.3) if not bool(selfControl.myThreads): break if selfControl.myThreads['RestServer']: if n < 1: log.debug("Stopping REST server") try: if cfg["rest"]["host"] in ['::1', '0.0.0.0']: host = '127.0.0.1' else: host = cfg["rest"]["host"] cnx = httpClient.HTTPConnection(host, cfg["rest"]["port"], timeout=12) cnx.request(method="GET", url='/a=stop?stop') cnx.getresponse() except Exception: pass n = 1 continue elif selfControl.myThreads['Scheduler']: if n < 2: log.debug("Stopping Scheduler thread") scheduler.cmd = 'stop' n = 2 elif selfControl.myThreads['Datastore']: if n < 3: log.debug("Stopping Datastore thread") ds.close() n = 3 else: break except Exception as e: log.error(f'Shutdown failed: {traceback.format_exc()}') finally: selfControl.stop = True log.info("Lootnika stopped") if not stillWork: os._exit(1)
def main(gte, lte, timestamp, time_zone): time = datetime.datetime.now().strftime('%Y-%m-%d') blacklist_dir = os.path.join(data_path, 'source' + '-' + str(time) + ".json") blacklist_Trie_dir = os.path.join(data_path, 'trie' + '-' + str(time) + ".json") if not (os.path.exists(blacklist_dir) and os.path.exists(blacklist_Trie_dir)): blacklist_dir = os.path.join(data_path, 'source-default.json') blacklist_Trie_dir = os.path.join(data_path, 'trie-default.json') es = ESclient() try: log.debug('Getting ES DNS domain.') search_result = es.get_es_domain(gte=gte, lte=lte, time_zone=time_zone) log.debug('Get ES DNS domain completed.') except Exception as e: log.error("Get ES DNS domain failed.\n{0}".format(e)) raise e split_DNSList = get_split_DNSList(search_result) blacklist_Trie = load_dict(blacklist_Trie_dir) match_DNSList, match_blacklist = find_match_DNS(blacklist_Trie, split_DNSList) match_DNSList, match_blacklist = check_whitelist(match_DNSList, match_blacklist) log.debug('Match DNS list : {0}'.format(match_DNSList)) log.debug('Match DNS blacklist : {0}'.format(match_blacklist)) # 匹配的DNS回插到es if match_DNSList: ipv4_pattern = re.compile( '^(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}$' ) try: blacklist = load_dict(blacklist_dir) for i in range(len(match_blacklist)): domain = u'{}'.format('.'.join(match_blacklist[i])) domain_es = '.'.join(match_DNSList[i]) doc = dict(blacklist[domain]) source = doc.pop('source') doc['domain'] = domain_es doc['eventid'] = 102002 doc['@timestamp'] = timestamp doc['level'] = "info" doc['type'] = "mal_dns" doc['desc_type'] = "[mal_dns] Request of Malicious Domain Name Detection" doc['desc_subtype'] = "[{0}] Intelligence comes from:{1}".format( doc['subtype'], source) if others["alert_level"] == "info": es.es_index(doc) if syslogger: syslogger.info(doc) continue search_result = es.get_domain_info(gte=gte, lte=lte, domain=domain_es, time_zone=time_zone) sip_answer_dict = get_sip_answer_dict(search_result) for sip in sip_answer_dict: answer_list = sip_answer_dict[sip] doc['level'] = "info" doc['sip'] = sip doc['src_dept'] = get_dept_info(sip) dip_list = [] for answer in answer_list: doc['answer'] = answer if ipv4_pattern.findall(answer): dip_list.append(answer) doc['dip'] = answer dipGeo = get_ipip_geo(answer) doc['dst_country'] = dipGeo[0] doc['dst_province'] = dipGeo[1] else: doc.pop("dip", "") doc.pop("dst_country", "") doc.pop("dst_province", "") es.es_index(doc) if syslogger: syslogger.info(doc) doc.pop("dip", "") doc.pop("answer", "") for dip in dip_list: sip_list = es.second_check(gte=gte, lte=lte, time_zone=time_zone, dip=dip) # print sip_list for sip in sip_list: doc['dip'] = dip doc["sip"] = sip doc['src_dept'] = get_dept_info(sip) dipGeo = get_ipip_geo(dip) doc['dst_country'] = dipGeo[0] doc['dst_province'] = dipGeo[1] doc["level"] = "warn" es.es_index(doc) if syslogger: syslogger.info(doc) doc.pop("dip", "") doc.pop("sip", "") doc.pop("level", "") doc.pop("src_dept", "") doc.pop("dst_country", "") doc.pop("dst_province", "") except Exception as e: log.error( "Insert the alert of threat DNS to ES failed.\n{0}".format(e)) raise e
def check_rst(ds: Datastore) -> dict: #TODO need refactoring log.debug("Check documentation sources") changed = False try: rows = ds.select('SELECT * FROM sphinxbuilder', ) oldRst = { 'lootnika': { 'path': "docs/rst/", 'type': 'lootnika', 'rst': {} } } for row in rows: if row[1] not in oldRst: oldRst[row[1]] = {'rst': {}} oldRst[row[1]]['rst'][row[3]] = {'file': row[2], 'hash': row[4]} newRst = { 'lootnika': { 'path': "docs/rst/", 'type': 'lootnika', 'rst': {} } } for exporter in os.listdir(f'{homeDir}exporters'): path = f"exporters/{exporter}/docs/rst/" ls = os.listdir(f"{homeDir}{path}") if ls == []: log.warning(f"No documentation sources found for {exporter}") continue if exporter not in oldRst: log.info(f"Found new exporter docs: {exporter}") oldRst[exporter] = { 'path': path, 'type': 'exporter', 'rst': {} } newRst[exporter] = {'path': path, 'type': 'exporter', 'rst': {}} for file in ls: rst = f"{path}{file}" with open(f"{homeDir}{rst}", encoding='utf-8', mode='r') as cnt: hsh = f"{cityhash.CityHash64(cnt.read())}" newRst[exporter]['rst'][rst] = {'file': file, 'hash': hsh} if rst in oldRst[exporter]['rst']: if not oldRst[exporter]['rst'][rst]['hash'] == hsh: changed = True else: changed = True for picker in os.listdir(f'{homeDir}pickers'): path = f"pickers/{picker}/docs/rst/" ls = os.listdir(f"{homeDir}{path}") if ls == []: log.warning(f"No documentation sources found for {picker}") continue if picker not in oldRst: log.info(f"Found new picker docs: {picker}") oldRst[picker] = {'path': path, 'type': 'exporter', 'rst': {}} newRst[picker] = {'path': path, 'type': 'picker', 'rst': {}} for file in ls: rst = f"{path}{file}" with open(f"{homeDir}{rst}", encoding='utf-8', mode='r') as cnt: hsh = f"{cityhash.CityHash64(cnt.read())}" newRst[picker]['rst'][rst] = {'file': file, 'hash': hsh} if rst in oldRst[picker]['rst']: if not oldRst[picker]['rst'][rst]['hash'] == hsh: changed = True else: changed = True exporter = "lootnika" path = newRst[exporter]['path'] ls = os.listdir(f"{homeDir}{path}") for file in ls: rst = f"{path}{file}" with open(f"{homeDir}{rst}", encoding='utf-8', mode='r') as cnt: hsh = f"{cityhash.CityHash64(cnt.read())}" newRst[exporter]['rst'][rst] = {'file': file, 'hash': hsh} if rst in oldRst[exporter]['rst']: if not oldRst[exporter]['rst'][rst]['hash'] == hsh: changed = True else: changed = True if changed: log.warning( "Found changes in documentations. Start me with <make-doc> key." ) return newRst except Exception as e: raise Exception( f"Fail check sources for help documentation: {traceback.format_exc()}" )
fromlist=['Picker']) return getattr(module, 'Picker') except ModuleNotFoundError as e: log.fatal(f"Can't initialize picker {pickerType}: {e}") raise SystemExit(1) except AttributeError as e: log.fatal(f'Wrong picker initializing: {e}') raise SystemExit(1) except Exception as e: log.fatal(f'Fail initialize picker: {e}') raise SystemExit(1) if __name__ != "__main__": log.debug("Starting main thread") selfControl = SelfControl() ds = Datastore(f'{homeDir}lootnika_tasks_journal.db') sphinxbuilder.check_rst(ds) from scheduler import Scheduler, first_start_calc startTime, taskCycles, repeatMin = first_start_calc(cfg['schedule']) # Scheduler и Picker должны видеть друг друга scheduler = Scheduler(cfg['schedule']['tasks'], taskCycles, repeatMin, startTime) Picker = load_picker() Thread(name='Scheduler', target=scheduler.run, args=(Picker, )).start()
def process_inspector(): def get_pid(exe: str, exePath: str, workDir: str = None) -> int: # if give workDir, will check only it for p in psutil.process_iter(["name", 'exe', 'cwd']): # if 'calc1' in p.info['name']: # sout(f"{p.pid} | {p.info['name']} | {p.info['cwd']} | {p.info['exe']}", 'violet' ) if exe == p.info['name'].lower(): if workDir: if not p.info['cwd'].endswith('/'): p.info['cwd'] = f"{p.info['cwd']}/" if workDir.lower() == p.info['cwd'].replace('\\', '/', -1).lower(): return p.pid else: if PLATFORM == 'nt': exePath = f"{exePath}{exe}" else: exePath = exePath[:-1] if exePath.lower() == p.info['exe'].replace('\\', '/', -1).lower(): return p.pid def restart(job: dict, exePid: int = None, killRecursive: bool = False) -> str: data = "" status = 0 failList[taskName]['attemp'] += 1 if exePid: try: assert exePid != os.getpid(), "won't kill myself" parent = psutil.Process(exePid) children = parent.children(killRecursive) children.append(parent) # TODO try soft kill before hard for p in children: try: # p.send_signal(signal.SIGTERM) p.kill() except psutil.NoSuchProcess: pass _, alive = psutil.wait_procs(children, timeout=60) if alive: raise Exception( f"Fail to kill process {exe} (PID {exePid})") except Exception as e: data = f'Fail to restart process {exe}: {e}\n' log.error(data) status = 2 if status == 0: log.debug(f"Launch application {taskName}") whatStart = job['whatStart'] if whatStart == 'command': target = job['command'] elif whatStart == 'exe': target = f"{job['exePath']}{exe} {job['exeKey']}" else: target = None if target: log.info(f"Starting {taskName}") try: if PLATFORM == 'nt': os.system(f"start cmd /c {target}") else: os.system(f"command {target} &") except Exception as e: data = f"Fail to restart application: {exe} ({taskName}): {e}\n" status = 3 else: log.info(f"Starting service {job['service']}") try: if PLATFORM == 'nt': win32serviceutil.StartService(job['service']) else: os.system(f"systemctl start {job['service']}") except Exception as e: e = traceback.format_exc() log.error(str(e)) status = 3 data = f"Fail to start service: {job['service']} ({taskName}): {e}\n" # проверка что он снова не упал # TODO отсчёт времени падения после старта if status == 0: sleep(restartTime) if get_pid(exe, checkPath, workDir): data += 'Successfully restarted application' failList[taskName]['isAlive'] = False failList[taskName]['attemp'] -= 1 log.info(data) else: data += f'Fail to start {taskName}' log.error(data) else: log.error(data) new_toast(taskName, data) return data sleep(3) selfName = "process_inspector" failList = {} for job in jobList: failList[job] = {'isAlive': False, "attemp": 0} while True: try: for job in jobList.values(): taskName = job['task'] exe = job['exe'].lower() checkPath = job['checkPath'] exePath = job['exePath'] workDir = job['workDir'] doRestart = job['doRestart'] alwaysWork = job['alwaysWork'] restartTime = job['restartTime'] respTime = job['respTime'] status = 0 body = '' log.info(f'Check app {taskName}') exePid = get_pid(exe, checkPath, workDir) if exePid and not job['checkUrl']: log.debug(f"{taskName} is fine.") elif exePid and job['checkUrl']: log.debug(f"Found {taskName}. Check http status") try: res = requests.get(job['url'], timeout=respTime) if res.status_code != 200: raise Exception( f"Server return status {res.status_code}") log.debug(f"{taskName} is fine.") if not failList[taskName]['isAlive']: continue else: failList[taskName]['isAlive'] = False data = templater.tmpl_fill(selfName, 'alive') except Exception: status = 1 data = f"{taskName} didn't respond or return wrong answer. Trying to restart application\n" new_toast(f'Restarting {taskName}', data) log.warning(data) body = templater.tmpl_fill(selfName, "badAnswer").replace( "{{taskName}}", taskName, -1) failList[taskName]['isAlive'] = True if status != 0 and doRestart: data += restart(job, exePid) body += data if 'eventScript' in job: allowSend, body = execute_event_script( log, job['eventScript'], taskName, 'badAnswer', body) else: allowSend = True if allowSend: send_notify(taskName, 'badAnswer', body) elif not exePid and alwaysWork: body = templater.tmpl_fill(selfName, 'notFound').replace( "{{taskName}}", taskName, -1) data = f"Not found required application {taskName}. Trying to restart\n" log.warning(data) new_toast(f'Starting {taskName}', data) data += restart(job, exePid) body += data new_toast('log_inspector', 'notFound') if 'eventScript' in job: allowSend, body = execute_event_script( log, job['eventScript'], taskName, 'notFound', body) else: allowSend = True if allowSend: send_notify(taskName, 'notFound', body) sleep(intervalCheckMin) except Exception: e = traceback.format_exc() log.critical(str(e)) break
def disk_inspector(): def fill_tmpl(event: str) -> str: body = templater.tmpl_fill(selfName, event) body = body.replace('{{critFree}}', str(critFree), -1) body = body.replace('{{diskFree}}', str(diskFree), -1) body = body.replace('{{diskUsage}}', diskUsage, -1) body = body.replace('{{taskName}}', taskName, -1) return body.replace('{{diskWarn}}', str(diskWarn), -1) log.debug("disk_inspector started") selfName = 'disk_inspector' while True: for taskName, task in cfg['tasks']['diskTask'].items(): critFree = task['critFree'] diskUsage = task['diskUsage'] diskWarn = task['diskWarn'] try: diskFree = round( shutil.disk_usage(diskUsage).free / 1073741824, 2) if diskFree < critFree: log.error( f"Free disk space is critically small on {diskUsage}: {diskFree}" ) event = 'critFree' body = fill_tmpl(event) new_toast( diskUsage, f"Free disk space is critically small: {diskFree}") if 'eventScript' in task: allowSend, body = execute_event_script( log, task['eventScript'], taskName, event, body) else: allowSend = True if allowSend: send_notify(taskName, event, body) elif diskFree < diskWarn: log.warning( f"Free disk space is ends {diskUsage}: {diskFree}GB") event = 'diskWarn' body = fill_tmpl(event) new_toast(diskUsage, f"Free disk space is ends: {diskFree}GB") if 'eventScript' in task: allowSend, body = execute_event_script( log, task['eventScript'], taskName, event, body) else: allowSend = True if allowSend: send_notify(taskName, event, body) elif diskFree > diskWarn: log.info(f"disk {diskUsage}: {diskFree}GB free") except FileNotFoundError: log.error(f'disk_inspector: wrong path: {diskUsage}') except Exception as e: log.critical(f'disk_inspector: {traceback.format_exc()}') shutdown_me(9, 9) sleep(intervalCheckMin)
def restart(job: dict, exePid: int = None, killRecursive: bool = False) -> str: data = "" status = 0 failList[taskName]['attemp'] += 1 if exePid: try: assert exePid != os.getpid(), "won't kill myself" parent = psutil.Process(exePid) children = parent.children(killRecursive) children.append(parent) # TODO try soft kill before hard for p in children: try: # p.send_signal(signal.SIGTERM) p.kill() except psutil.NoSuchProcess: pass _, alive = psutil.wait_procs(children, timeout=60) if alive: raise Exception( f"Fail to kill process {exe} (PID {exePid})") except Exception as e: data = f'Fail to restart process {exe}: {e}\n' log.error(data) status = 2 if status == 0: log.debug(f"Launch application {taskName}") whatStart = job['whatStart'] if whatStart == 'command': target = job['command'] elif whatStart == 'exe': target = f"{job['exePath']}{exe} {job['exeKey']}" else: target = None if target: log.info(f"Starting {taskName}") try: if PLATFORM == 'nt': os.system(f"start cmd /c {target}") else: os.system(f"command {target} &") except Exception as e: data = f"Fail to restart application: {exe} ({taskName}): {e}\n" status = 3 else: log.info(f"Starting service {job['service']}") try: if PLATFORM == 'nt': win32serviceutil.StartService(job['service']) else: os.system(f"systemctl start {job['service']}") except Exception as e: e = traceback.format_exc() log.error(str(e)) status = 3 data = f"Fail to start service: {job['service']} ({taskName}): {e}\n" # проверка что он снова не упал # TODO отсчёт времени падения после старта if status == 0: sleep(restartTime) if get_pid(exe, checkPath, workDir): data += 'Successfully restarted application' failList[taskName]['isAlive'] = False failList[taskName]['attemp'] -= 1 log.info(data) else: data += f'Fail to start {taskName}' log.error(data) else: log.error(data) new_toast(taskName, data) return data