Пример #1
0
def send_notify(taskName: str, event: str, body: str):
    try:
        now = dtime.datetime.now()
        if taskName not in sendedNotify:
            sendedNotify[taskName] = {}

        if event not in sendedNotify[taskName]:
            sendedNotify[taskName][event] = {"dtm": now, "body": body}
        else:
            delta = now - sendedNotify[taskName][event]['dtm']
            if delta < resendTime:
                log.info(
                    f"Reject report of an event {event}: is already sent.")
                return

            if cfg['notify']['onlyChanges'] and sendedNotify[taskName][event][
                    'body'] == body:
                log.info(
                    f"Reject report of an event {event}: is not changed from last sent"
                )
                return

        log.debug(f"New report of an event {taskName}: {event}")

        if notify.send_notify(taskName, event, body):  # update
            sendedNotify[taskName][event] = {"dtm": now, "body": body}
        else:
            del sendedNotify[taskName][event]

    except Exception as e:
        log.error(f"Fail send notify: {e}")
        del sendedNotify[taskName][event]
Пример #2
0
    def send_notify(self, app:str, event:str, body:str) -> bool:
        try:
            # Формирует заголовок письма
            msg = MIMEMultipart('mixed')
            msg['Subject'] = templater.tmpl_fill(self.name, 'subject')
            msg['From'] = self.cfg['fromHeader']
            msg['To'] = self.cfg['sendTo']
            msg.attach(MIMEText(body))
        except Exception as e:
            log.error(str(e))

        log.debug(f"Connecting to email server {self.cfg['server']}")
        try:
            if self.cfg["useSSL"]:
                s = smtplib.SMTP_SSL(host=self.cfg['server'], port=self.cfg['port'])
                s.ehlo()
                s.login(self.cfg['user'], self.cfg['password'])
                s.auth_plain()
            else:
                s = smtplib.SMTP(self.cfg['server'], self.cfg['port'])
                s.ehlo().starttls().ehlo().login(self.cfg['user'], self.cfg['password']) # Рукопожатие, обязательно

            log.debug(f"Sending report")
            s.sendmail(self.cfg["fromHeader"], self.cfg["sendTo"], msg.as_string())

            log.info(f"Report of an event {app} sent")
            return True
        except Exception as e:
            if e.errno == 11004:
                log.error("Fail to connect to email server")
            else:
                log.error("Fail to send report: %s" % e)
            return False
Пример #3
0
    def crash(self, s: str) -> bool:
        # TODO write in datastore error message
        log.critical(s)
        for i in self.myThreads:
            if not self.myThreads[i]:
                log.critical(f"Core thread {i} is dead")

        log.debug('Calling shutdown')
        Thread(name="shutdown", target=shutdown_me, args=(1, '')).start()
        return True
Пример #4
0
def get_blacklist_module():
    parse_blacklist = moudle_name
    for file_name in parse_blacklist:
        module = __import__('get_blacklist.{0}'.format(file_name),
                            fromlist=True)
        log.debug('Downloading {0}.'.format(file_name))
        try:
            module.main()
            log.debug('Download {0} completed.'.format(file_name))
        except Exception as e:
            log.error('Download {0} failed.'.format(file_name))
Пример #5
0
def first_start_calc(cfg: dict, onStart=True):
    """
    расчёт времени до первого старта заданий.
    Следующие старты расчитывает сам планировщик
    :param cfg:
    :param onStart:
    :return:
    """
    def delay_calc(taskStartTime):
        startTime = dtime.datetime.now()
        if taskStartTime.lower() != 'now':
            now = dtime.datetime.now()
            now = now.hour * 3600 + now.minute * 60 + now.second
            try:
                nextStart = dtime.datetime.strptime(taskStartTime, '%H:%M:%S')
                nextStart = nextStart.hour * 3600 + nextStart.minute * 60 + nextStart.second
                if now > nextStart:
                    delay = 86400 - now + nextStart  # сегодня = что прошло+время завтра до старта
                    startTime += dtime.timedelta(seconds=delay)
                    if onStart:
                        log.info(f"Tasks will start at {taskStartTime}")
                else:
                    delay = nextStart - now
                    startTime += dtime.timedelta(seconds=delay)
                    if onStart:
                        log.info(f"Tasks will start today at {taskStartTime}")
            except Exception as e:
                log.error(
                    f'Check parameter taskStartTime: {e}. Correct format used HH:MM:SS'
                )
                time.sleep(2)
                shutdown_me(1, '')
        return startTime

    taskStartTime = cfg["taskStartTime"]
    taskCycles = cfg["taskCycles"]
    repeatMin = cfg["repeatMin"]
    taskList = cfg['tasks']

    if cfg["startTask"] and taskCycles != 0 and taskList is not None:
        log.debug(
            f"Tasks count: {len(taskList)}. Tasks cycles count: {taskCycles}")
        startTime = delay_calc(taskStartTime)
    else:
        # тут расписание можно запустить лишь командой.
        # Тогда taskCycles прибавит 1
        # Он тут же начнёт задания и выполнит цикл один раз.
        # taskCycles станет 0 и закончит расписание
        startTime = None
        taskCycles = -1
        repeatMin = -1

    return startTime, taskCycles, repeatMin
Пример #6
0
 def __init__(self, db: str):
     super(Datastore, self).__init__()
     log.debug(f"Starting Datastore thread")
     self.db = db
     self.name = 'Datastore'
     self.status = {}
     self.requestQ = Queue()
     self.isRestored = False
     self.isMemory = False
     self.isReady = False
     if db == ':memory:':
         log.warning('Using inMemory Datastore')
         self.isMemory = True
     self.start()
Пример #7
0
def log_inspector():
    log.debug("log_inspector started")
    selfName = 'log_inspector'
    while True:
        try:
            for taskName, task in cfg['tasks']['logTask'].items():
                log.info(f"Check log {taskName}")
                logFile = task['file']
                templates = task['tmpl']

                try:
                    # TODO open if file is changed
                    with open(logFile, encoding='utf-8') as f:
                        cnt = f.read()

                    for tmplName in templates:
                        tmpl = templater.get_tmpl(selfName, tmplName)
                        if tmpl in cnt:
                            ev = f"Found log expression {taskName}: {tmplName}"
                            log.warning(ev)
                            body = templater.tmpl_fill(selfName,
                                                       'error').replace(
                                                           '{{taskName}}',
                                                           taskName, -1)
                            event = 'error'

                            new_toast('log_inspector', event)
                            if 'eventScript' in task:
                                allowSend, body = execute_event_script(
                                    log, task['eventScript'], taskName, event,
                                    body)
                            else:
                                allowSend = True

                            if allowSend:
                                send_notify(taskName, event, body)

                except FileNotFoundError:
                    log.error(f"Not found log file {taskName}")
                except Exception as e:
                    log.error(f"Fail to parse log file {taskName}: {e}")

            sleep(intervalCheckMin * 2)
        except Exception:
            e = traceback.format_exc()
            log.critical(str(e))
            break
Пример #8
0
    def run(self, Picker):
        log.debug("Starting Scheduler thread")
        self.Picker = Picker

        while not selfControl.started and self.cmd != 'stop':
            time.sleep(0.2)

        while self.cmd != 'stop':
            self._get_workers()
            # print('%s >= %s is %s' %(dtime.datetime.now(), self.startTime, self._isTaskTime()))
            # message(('self.status',  self.status), clrSun)

            if self.status == 'ready':
                # если расписание не включено или всё выполнилось,
                # то переходит в ждущий режим
                if self.taskCycles > 0:
                    self.status = 'wait'
                    if self._isTaskTime():
                        ht = Thread(name='work_manager',
                                    target=self._work_manager)
                        ht.start()
                        self.workers.append(ht)
                    # все последующие повторы отсчитываются от первого
            else:
                if self.taskCycles > 0:
                    if self._isTaskTime():
                        ht = Thread(name='work_manager',
                                    target=self._work_manager)
                        ht.start()
                        self.workers.append(ht)

                elif self.taskCycles == 0:  # -1 означает выкл. расписание
                    if self.status == 'ready':
                        log.info('Tasks cycle done')

            # print('!#cycle Status', self.status)
            time.sleep(1)

        # при выходе из цикла ждёт завершения работы рабочих и отменяет таймеры
        self.status = 'stop'
        self._get_workers()
        for ht in self.workers:
            # message(ht,clrSun)
            ht.join()

        log.debug("Stopped Scheduler thread")
        return
Пример #9
0
 def __init__(self):
     log.debug("Starting SelfControl thread")
     super(SelfControl, self).__init__()
     self.name = 'SelfControl'
     self.myThreads = {
         'RestServer': False,
         'Scheduler': False,
         'Datastore': False
     }
     self.allThreads = []
     self.pid = os.getpid()
     self.pidUser = psutil.Process(self.pid).username()
     self.resourcesUsage = {"cpu": 0.0}
     self.isVerified = False  # все модули работают
     self.started = False  # все модули запустились
     self.exit = False  # игнорит убитые модули
     self.rate = 0.5  # частота проверки
     self.start()
Пример #10
0
    def run(self):
        cnx = self._create_db()
        cur = cnx.cursor()
        self.isReady = True
        while True:
            try:
                req, arg, res, token = self.requestQ.get()
                # print(token, self.status[token])
                # print(token,req)
                if req == '--close--':
                    if self.status[token] == -1:
                        self.status[token] = 0
                    self.requestQ.task_done()
                    break

                elif req == '--commit--':
                    cnx.commit()
                    if self.status[token] == -1:
                        self.status[token] = 0
                    self.requestQ.task_done()
                    continue

                # print(token,'!#run-run')
                cur.execute(req, arg)
                if res:
                    for row in cur:
                        res.put(row)
                    res.put('--no more--')
                    if self.status[token] == -1:
                        self.status[token] = 0

                self.requestQ.task_done()
            except Exception as e:
                # print(token, 'error')
                e = f'Unable to access to {self.name}: {e}'
                log.error(e)
                self.status[token] = e
                self.requestQ.task_done()

        cnx.close()
        self.requestQ.task_done()
        log.debug("Stopped Datastore thread")
Пример #11
0
    def load_config(self, config: configparser, proxy:dict = None) -> dict:
        self.cfg['proxy'] = proxy
        try:
            self.cfg["sendTo"] = config.get(self.name, "sendTo")
            self.cfg["server"] = config.get(self.name, "server")
            self.cfg["port"] = config.getint(self.name, "port")
            self.cfg["useSSL"] = config.getboolean(self.name, "useSSL")
            self.cfg["user"] = config.get(self.name, "user")
            self.cfg["password"] = config.get(self.name, "password")
            self.cfg["fromHeader"] = config.get(self.name, "fromHeader")
            log.info(f'Recipient mail address {self.cfg["sendTo"]}')
        except Exception as e:
            e = f"Bad {self.name} configuration: {e}"
            log.error(e)
            raise Exception(e)

        if re.findall(r'\w+@\w+.\w+', self.cfg["sendTo"]):
            log.debug(f'Recipient mail address: {self.cfg["sendTo"]}')
        else:
            log.error("Wrong email sendTo.")
            raise SystemExit(1)

        return self.cfg
Пример #12
0
def shutdown_me(signum=1, frame=1):
    """
    Останавливает модули в нужном порядке
    """
    log.warning(
        f'Lootnika stopping on {cfg["rest"]["host"]}:{cfg["rest"]["port"]}')
    if selfControl.exit:
        return

    selfControl.exit = True
    selfControl.rate = 0.3
    n = 0
    try:
        while True:
            time.sleep(0.3)
            if not bool(selfControl.myThreads):
                break

            if selfControl.myThreads['RestServer']:
                if n < 1:
                    log.debug("Stopping REST server")
                    try:
                        if cfg["rest"]["host"] in ['::1', '0.0.0.0']:
                            host = '127.0.0.1'
                        else:
                            host = cfg["rest"]["host"]

                        cnx = httpClient.HTTPConnection(host,
                                                        cfg["rest"]["port"],
                                                        timeout=12)
                        cnx.request(method="GET", url='/a=stop?stop')
                        cnx.getresponse()
                    except Exception:
                        pass
                    n = 1
                    continue
            elif selfControl.myThreads['Scheduler']:
                if n < 2:
                    log.debug("Stopping Scheduler thread")
                    scheduler.cmd = 'stop'
                    n = 2
            elif selfControl.myThreads['Datastore']:
                if n < 3:
                    log.debug("Stopping Datastore thread")
                    ds.close()
                    n = 3
            else:
                break

    except Exception as e:
        log.error(f'Shutdown failed: {traceback.format_exc()}')
    finally:
        selfControl.stop = True
        log.info("Lootnika stopped")
        if not stillWork:
            os._exit(1)
Пример #13
0
def main(gte, lte, timestamp, time_zone):
    time = datetime.datetime.now().strftime('%Y-%m-%d')
    blacklist_dir = os.path.join(data_path,
                                 'source' + '-' + str(time) + ".json")
    blacklist_Trie_dir = os.path.join(data_path,
                                      'trie' + '-' + str(time) + ".json")

    if not (os.path.exists(blacklist_dir)
            and os.path.exists(blacklist_Trie_dir)):
        blacklist_dir = os.path.join(data_path, 'source-default.json')
        blacklist_Trie_dir = os.path.join(data_path, 'trie-default.json')

    es = ESclient()
    try:
        log.debug('Getting ES DNS domain.')
        search_result = es.get_es_domain(gte=gte, lte=lte, time_zone=time_zone)
        log.debug('Get ES DNS domain completed.')
    except Exception as e:
        log.error("Get ES DNS domain failed.\n{0}".format(e))
        raise e

    split_DNSList = get_split_DNSList(search_result)

    blacklist_Trie = load_dict(blacklist_Trie_dir)
    match_DNSList, match_blacklist = find_match_DNS(blacklist_Trie,
                                                    split_DNSList)
    match_DNSList, match_blacklist = check_whitelist(match_DNSList,
                                                     match_blacklist)
    log.debug('Match DNS list : {0}'.format(match_DNSList))
    log.debug('Match DNS blacklist : {0}'.format(match_blacklist))
    # 匹配的DNS回插到es
    if match_DNSList:
        ipv4_pattern = re.compile(
            '^(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}$'
        )
        try:
            blacklist = load_dict(blacklist_dir)
            for i in range(len(match_blacklist)):
                domain = u'{}'.format('.'.join(match_blacklist[i]))
                domain_es = '.'.join(match_DNSList[i])
                doc = dict(blacklist[domain])
                source = doc.pop('source')
                doc['domain'] = domain_es
                doc['eventid'] = 102002
                doc['@timestamp'] = timestamp
                doc['level'] = "info"
                doc['type'] = "mal_dns"
                doc['desc_type'] = "[mal_dns] Request of Malicious Domain Name Detection"
                doc['desc_subtype'] = "[{0}] Intelligence comes from:{1}".format(
                    doc['subtype'], source)
                if others["alert_level"] == "info":
                    es.es_index(doc)
                    if syslogger:
                        syslogger.info(doc)
                    continue
                search_result = es.get_domain_info(gte=gte,
                                                   lte=lte,
                                                   domain=domain_es,
                                                   time_zone=time_zone)

                sip_answer_dict = get_sip_answer_dict(search_result)

                for sip in sip_answer_dict:

                    answer_list = sip_answer_dict[sip]
                    doc['level'] = "info"
                    doc['sip'] = sip
                    doc['src_dept'] = get_dept_info(sip)

                    dip_list = []
                    for answer in answer_list:
                        doc['answer'] = answer

                        if ipv4_pattern.findall(answer):
                            dip_list.append(answer)
                            doc['dip'] = answer
                            dipGeo = get_ipip_geo(answer)
                            doc['dst_country'] = dipGeo[0]
                            doc['dst_province'] = dipGeo[1]
                        else:
                            doc.pop("dip", "")
                            doc.pop("dst_country", "")
                            doc.pop("dst_province", "")

                        es.es_index(doc)
                        if syslogger:
                            syslogger.info(doc)

                        doc.pop("dip", "")
                        doc.pop("answer", "")

                    for dip in dip_list:
                        sip_list = es.second_check(gte=gte,
                                                   lte=lte,
                                                   time_zone=time_zone,
                                                   dip=dip)
                        #						print sip_list
                        for sip in sip_list:
                            doc['dip'] = dip
                            doc["sip"] = sip
                            doc['src_dept'] = get_dept_info(sip)
                            dipGeo = get_ipip_geo(dip)
                            doc['dst_country'] = dipGeo[0]
                            doc['dst_province'] = dipGeo[1]
                            doc["level"] = "warn"
                            es.es_index(doc)
                            if syslogger:
                                syslogger.info(doc)

                            doc.pop("dip", "")
                            doc.pop("sip", "")
                            doc.pop("level", "")
                            doc.pop("src_dept", "")
                            doc.pop("dst_country", "")
                            doc.pop("dst_province", "")
        except Exception as e:
            log.error(
                "Insert the alert of threat DNS to ES failed.\n{0}".format(e))
            raise e
Пример #14
0
def check_rst(ds: Datastore) -> dict:
    #TODO need refactoring
    log.debug("Check documentation sources")
    changed = False
    try:
        rows = ds.select('SELECT * FROM sphinxbuilder', )
        oldRst = {
            'lootnika': {
                'path': "docs/rst/",
                'type': 'lootnika',
                'rst': {}
            }
        }
        for row in rows:
            if row[1] not in oldRst:
                oldRst[row[1]] = {'rst': {}}

            oldRst[row[1]]['rst'][row[3]] = {'file': row[2], 'hash': row[4]}

        newRst = {
            'lootnika': {
                'path': "docs/rst/",
                'type': 'lootnika',
                'rst': {}
            }
        }
        for exporter in os.listdir(f'{homeDir}exporters'):
            path = f"exporters/{exporter}/docs/rst/"
            ls = os.listdir(f"{homeDir}{path}")
            if ls == []:
                log.warning(f"No documentation sources found for {exporter}")
                continue

            if exporter not in oldRst:
                log.info(f"Found new exporter docs: {exporter}")
                oldRst[exporter] = {
                    'path': path,
                    'type': 'exporter',
                    'rst': {}
                }

            newRst[exporter] = {'path': path, 'type': 'exporter', 'rst': {}}
            for file in ls:
                rst = f"{path}{file}"
                with open(f"{homeDir}{rst}", encoding='utf-8',
                          mode='r') as cnt:
                    hsh = f"{cityhash.CityHash64(cnt.read())}"

                newRst[exporter]['rst'][rst] = {'file': file, 'hash': hsh}
                if rst in oldRst[exporter]['rst']:
                    if not oldRst[exporter]['rst'][rst]['hash'] == hsh:
                        changed = True
                else:
                    changed = True

        for picker in os.listdir(f'{homeDir}pickers'):
            path = f"pickers/{picker}/docs/rst/"
            ls = os.listdir(f"{homeDir}{path}")
            if ls == []:
                log.warning(f"No documentation sources found for {picker}")
                continue

            if picker not in oldRst:
                log.info(f"Found new picker docs: {picker}")
                oldRst[picker] = {'path': path, 'type': 'exporter', 'rst': {}}

            newRst[picker] = {'path': path, 'type': 'picker', 'rst': {}}
            for file in ls:
                rst = f"{path}{file}"
                with open(f"{homeDir}{rst}", encoding='utf-8',
                          mode='r') as cnt:
                    hsh = f"{cityhash.CityHash64(cnt.read())}"

                newRst[picker]['rst'][rst] = {'file': file, 'hash': hsh}
                if rst in oldRst[picker]['rst']:
                    if not oldRst[picker]['rst'][rst]['hash'] == hsh:
                        changed = True
                else:
                    changed = True

        exporter = "lootnika"
        path = newRst[exporter]['path']
        ls = os.listdir(f"{homeDir}{path}")
        for file in ls:
            rst = f"{path}{file}"
            with open(f"{homeDir}{rst}", encoding='utf-8', mode='r') as cnt:
                hsh = f"{cityhash.CityHash64(cnt.read())}"

            newRst[exporter]['rst'][rst] = {'file': file, 'hash': hsh}
            if rst in oldRst[exporter]['rst']:
                if not oldRst[exporter]['rst'][rst]['hash'] == hsh:
                    changed = True
            else:
                changed = True

        if changed:
            log.warning(
                "Found changes in documentations. Start me with <make-doc> key."
            )

        return newRst
    except Exception as e:
        raise Exception(
            f"Fail check sources for help documentation: {traceback.format_exc()}"
        )
Пример #15
0
                            fromlist=['Picker'])
        return getattr(module, 'Picker')

    except ModuleNotFoundError as e:
        log.fatal(f"Can't initialize picker {pickerType}: {e}")
        raise SystemExit(1)
    except AttributeError as e:
        log.fatal(f'Wrong picker initializing: {e}')
        raise SystemExit(1)
    except Exception as e:
        log.fatal(f'Fail initialize picker: {e}')
        raise SystemExit(1)


if __name__ != "__main__":
    log.debug("Starting main thread")

    selfControl = SelfControl()
    ds = Datastore(f'{homeDir}lootnika_tasks_journal.db')

    sphinxbuilder.check_rst(ds)

    from scheduler import Scheduler, first_start_calc
    startTime, taskCycles, repeatMin = first_start_calc(cfg['schedule'])

    # Scheduler и Picker должны видеть друг друга
    scheduler = Scheduler(cfg['schedule']['tasks'], taskCycles, repeatMin,
                          startTime)
    Picker = load_picker()
    Thread(name='Scheduler', target=scheduler.run, args=(Picker, )).start()
Пример #16
0
def process_inspector():
    def get_pid(exe: str, exePath: str, workDir: str = None) -> int:
        # if give workDir, will check only it

        for p in psutil.process_iter(["name", 'exe', 'cwd']):
            # if 'calc1' in p.info['name']:
            # sout(f"{p.pid} | {p.info['name']} | {p.info['cwd']} | {p.info['exe']}", 'violet' )

            if exe == p.info['name'].lower():
                if workDir:
                    if not p.info['cwd'].endswith('/'):
                        p.info['cwd'] = f"{p.info['cwd']}/"

                    if workDir.lower() == p.info['cwd'].replace('\\', '/',
                                                                -1).lower():
                        return p.pid
                else:
                    if PLATFORM == 'nt':
                        exePath = f"{exePath}{exe}"
                    else:
                        exePath = exePath[:-1]

                    if exePath.lower() == p.info['exe'].replace('\\', '/',
                                                                -1).lower():
                        return p.pid

    def restart(job: dict,
                exePid: int = None,
                killRecursive: bool = False) -> str:
        data = ""
        status = 0
        failList[taskName]['attemp'] += 1
        if exePid:
            try:
                assert exePid != os.getpid(), "won't kill myself"
                parent = psutil.Process(exePid)
                children = parent.children(killRecursive)
                children.append(parent)

                # TODO try soft kill before hard
                for p in children:
                    try:
                        # p.send_signal(signal.SIGTERM)
                        p.kill()
                    except psutil.NoSuchProcess:
                        pass

                _, alive = psutil.wait_procs(children, timeout=60)
                if alive:
                    raise Exception(
                        f"Fail to kill process {exe} (PID {exePid})")
            except Exception as e:
                data = f'Fail to restart process {exe}: {e}\n'
                log.error(data)
                status = 2

        if status == 0:
            log.debug(f"Launch application {taskName}")
            whatStart = job['whatStart']

            if whatStart == 'command':
                target = job['command']
            elif whatStart == 'exe':
                target = f"{job['exePath']}{exe} {job['exeKey']}"
            else:
                target = None

            if target:
                log.info(f"Starting {taskName}")
                try:
                    if PLATFORM == 'nt':
                        os.system(f"start cmd /c {target}")
                    else:
                        os.system(f"command {target} &")
                except Exception as e:
                    data = f"Fail to restart application: {exe} ({taskName}): {e}\n"
                    status = 3
            else:
                log.info(f"Starting service {job['service']}")
                try:
                    if PLATFORM == 'nt':
                        win32serviceutil.StartService(job['service'])
                    else:
                        os.system(f"systemctl start {job['service']}")

                except Exception as e:
                    e = traceback.format_exc()
                    log.error(str(e))
                    status = 3
                    data = f"Fail to start service: {job['service']} ({taskName}): {e}\n"

            # проверка что он снова не упал
            # TODO отсчёт времени падения после старта
            if status == 0:
                sleep(restartTime)
                if get_pid(exe, checkPath, workDir):
                    data += 'Successfully restarted application'
                    failList[taskName]['isAlive'] = False
                    failList[taskName]['attemp'] -= 1
                    log.info(data)
                else:
                    data += f'Fail to start {taskName}'
                    log.error(data)
            else:
                log.error(data)

        new_toast(taskName, data)
        return data

    sleep(3)
    selfName = "process_inspector"
    failList = {}
    for job in jobList:
        failList[job] = {'isAlive': False, "attemp": 0}

    while True:
        try:
            for job in jobList.values():
                taskName = job['task']
                exe = job['exe'].lower()
                checkPath = job['checkPath']
                exePath = job['exePath']
                workDir = job['workDir']
                doRestart = job['doRestart']
                alwaysWork = job['alwaysWork']
                restartTime = job['restartTime']
                respTime = job['respTime']
                status = 0
                body = ''

                log.info(f'Check app {taskName}')
                exePid = get_pid(exe, checkPath, workDir)

                if exePid and not job['checkUrl']:
                    log.debug(f"{taskName} is fine.")
                elif exePid and job['checkUrl']:
                    log.debug(f"Found {taskName}. Check http status")
                    try:
                        res = requests.get(job['url'], timeout=respTime)
                        if res.status_code != 200:
                            raise Exception(
                                f"Server return status {res.status_code}")

                        log.debug(f"{taskName} is fine.")

                        if not failList[taskName]['isAlive']:
                            continue
                        else:
                            failList[taskName]['isAlive'] = False
                            data = templater.tmpl_fill(selfName, 'alive')
                    except Exception:
                        status = 1
                        data = f"{taskName} didn't respond or return wrong answer. Trying to restart application\n"
                        new_toast(f'Restarting {taskName}', data)
                        log.warning(data)

                        body = templater.tmpl_fill(selfName,
                                                   "badAnswer").replace(
                                                       "{{taskName}}",
                                                       taskName, -1)
                        failList[taskName]['isAlive'] = True

                    if status != 0 and doRestart:
                        data += restart(job, exePid)
                        body += data

                    if 'eventScript' in job:
                        allowSend, body = execute_event_script(
                            log, job['eventScript'], taskName, 'badAnswer',
                            body)
                    else:
                        allowSend = True

                    if allowSend:
                        send_notify(taskName, 'badAnswer', body)

                elif not exePid and alwaysWork:
                    body = templater.tmpl_fill(selfName, 'notFound').replace(
                        "{{taskName}}", taskName, -1)
                    data = f"Not found required application {taskName}. Trying to restart\n"
                    log.warning(data)
                    new_toast(f'Starting {taskName}', data)

                    data += restart(job, exePid)
                    body += data

                    new_toast('log_inspector', 'notFound')
                    if 'eventScript' in job:
                        allowSend, body = execute_event_script(
                            log, job['eventScript'], taskName, 'notFound',
                            body)
                    else:
                        allowSend = True

                    if allowSend:
                        send_notify(taskName, 'notFound', body)

            sleep(intervalCheckMin)
        except Exception:
            e = traceback.format_exc()
            log.critical(str(e))
            break
Пример #17
0
def disk_inspector():
    def fill_tmpl(event: str) -> str:
        body = templater.tmpl_fill(selfName, event)
        body = body.replace('{{critFree}}', str(critFree), -1)
        body = body.replace('{{diskFree}}', str(diskFree), -1)
        body = body.replace('{{diskUsage}}', diskUsage, -1)
        body = body.replace('{{taskName}}', taskName, -1)
        return body.replace('{{diskWarn}}', str(diskWarn), -1)

    log.debug("disk_inspector started")
    selfName = 'disk_inspector'

    while True:
        for taskName, task in cfg['tasks']['diskTask'].items():
            critFree = task['critFree']
            diskUsage = task['diskUsage']
            diskWarn = task['diskWarn']

            try:
                diskFree = round(
                    shutil.disk_usage(diskUsage).free / 1073741824, 2)
                if diskFree < critFree:
                    log.error(
                        f"Free disk space is critically small on {diskUsage}: {diskFree}"
                    )
                    event = 'critFree'
                    body = fill_tmpl(event)

                    new_toast(
                        diskUsage,
                        f"Free disk space is critically small: {diskFree}")
                    if 'eventScript' in task:
                        allowSend, body = execute_event_script(
                            log, task['eventScript'], taskName, event, body)
                    else:
                        allowSend = True

                    if allowSend:
                        send_notify(taskName, event, body)

                elif diskFree < diskWarn:
                    log.warning(
                        f"Free disk space is ends {diskUsage}: {diskFree}GB")
                    event = 'diskWarn'
                    body = fill_tmpl(event)

                    new_toast(diskUsage,
                              f"Free disk space is ends: {diskFree}GB")
                    if 'eventScript' in task:
                        allowSend, body = execute_event_script(
                            log, task['eventScript'], taskName, event, body)
                    else:
                        allowSend = True

                    if allowSend:
                        send_notify(taskName, event, body)
                elif diskFree > diskWarn:
                    log.info(f"disk {diskUsage}: {diskFree}GB free")

            except FileNotFoundError:
                log.error(f'disk_inspector: wrong path: {diskUsage}')
            except Exception as e:
                log.critical(f'disk_inspector: {traceback.format_exc()}')
                shutdown_me(9, 9)
        sleep(intervalCheckMin)
Пример #18
0
    def restart(job: dict,
                exePid: int = None,
                killRecursive: bool = False) -> str:
        data = ""
        status = 0
        failList[taskName]['attemp'] += 1
        if exePid:
            try:
                assert exePid != os.getpid(), "won't kill myself"
                parent = psutil.Process(exePid)
                children = parent.children(killRecursive)
                children.append(parent)

                # TODO try soft kill before hard
                for p in children:
                    try:
                        # p.send_signal(signal.SIGTERM)
                        p.kill()
                    except psutil.NoSuchProcess:
                        pass

                _, alive = psutil.wait_procs(children, timeout=60)
                if alive:
                    raise Exception(
                        f"Fail to kill process {exe} (PID {exePid})")
            except Exception as e:
                data = f'Fail to restart process {exe}: {e}\n'
                log.error(data)
                status = 2

        if status == 0:
            log.debug(f"Launch application {taskName}")
            whatStart = job['whatStart']

            if whatStart == 'command':
                target = job['command']
            elif whatStart == 'exe':
                target = f"{job['exePath']}{exe} {job['exeKey']}"
            else:
                target = None

            if target:
                log.info(f"Starting {taskName}")
                try:
                    if PLATFORM == 'nt':
                        os.system(f"start cmd /c {target}")
                    else:
                        os.system(f"command {target} &")
                except Exception as e:
                    data = f"Fail to restart application: {exe} ({taskName}): {e}\n"
                    status = 3
            else:
                log.info(f"Starting service {job['service']}")
                try:
                    if PLATFORM == 'nt':
                        win32serviceutil.StartService(job['service'])
                    else:
                        os.system(f"systemctl start {job['service']}")

                except Exception as e:
                    e = traceback.format_exc()
                    log.error(str(e))
                    status = 3
                    data = f"Fail to start service: {job['service']} ({taskName}): {e}\n"

            # проверка что он снова не упал
            # TODO отсчёт времени падения после старта
            if status == 0:
                sleep(restartTime)
                if get_pid(exe, checkPath, workDir):
                    data += 'Successfully restarted application'
                    failList[taskName]['isAlive'] = False
                    failList[taskName]['attemp'] -= 1
                    log.info(data)
                else:
                    data += f'Fail to start {taskName}'
                    log.error(data)
            else:
                log.error(data)

        new_toast(taskName, data)
        return data