Пример #1
0
    def __load_jobs(self):
        try:
            a_dict = dict()
            sql = "SELECT services.id, services.service_name, machines.ssh_user, machines.ssh_ip, machines.ssh_port," \
                  "services.start_cmd, services.stop_cmd, services.is_active, services.auto_recover, services.mail_receiver " \
                  "FROM services,machines WHERE services.machine_id = machines.id"
            Logger.info(sql)
            self.cur.execute(sql)
            results = self.cur.fetchall()
            for row in results:
                (job_id, service_name, ssh_user, ssh_ip, ssh_port,
                 start_cmd, stop_cmd, is_active, auto_recover, mail_receiver) = row
                a_dict[job_id] = JobDetail(job_id, service_name, ssh_user, ssh_ip, ssh_port,
                                           start_cmd, stop_cmd, is_active, auto_recover, mail_receiver)

            if not self.__load_checks(a_dict):
                return None

            if not self.__load_relies(a_dict):
                return None

            with self.lock:
                self.job_list = list()
                for a_id, a_job in a_dict.items():
                    self.job_list.append(a_job)
            return True
        except:
            Logger.error(traceback.format_exc())
            return None
Пример #2
0
    def send(cls, receivers, subject, content):
        """
        :type receivers: string
        :type subject: string
        :type content: string
        :return:
        """
        try:
            if config.fake_mail:
                Logger.info('receivers=[%s], subject=[%s], content=[%s]' %
                            (receivers, subject, content))
                return

            url = 'http://f**k.you.com/send_mail'
            a_dict = {
                'receiver':
                receivers,
                'subject':
                subject,
                'content':
                datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + ": " +
                content
            }

            ret = requests.post(url, data=a_dict)
            Logger.info("http_code[%s], http_response[%s]" %
                        (ret.status_code, ret.text))
        except:
            Logger.error(traceback.format_exc())
Пример #3
0
    def __get_health(cls, item, raw_out):
        """
        Parse command out by operator, return (is_parse_error, is_healthy)
        :type item: CheckCmd
        :type raw_out: string
        :return: bool | None
        """
        healthy_code = StatusCode.WHITE_CODE
        try:
            out = raw_out.strip()
            match = None
            if item.operator == "<":
                if int(out) < int(item.check_value):
                    match = True
                else:
                    match = False
            elif item.operator == "<=":
                if int(out) <= int(item.check_value):
                    match = True
                else:
                    match = False
            elif item.operator == "==":
                if int(out) == int(item.check_value):
                    match = True
                else:
                    match = False
            elif item.operator == ">=":
                if int(out) >= int(item.check_value):
                    match = True
                else:
                    match = False
            elif item.operator == ">":
                if int(out) > int(item.check_value):
                    match = True
                else:
                    match = False
            elif item.operator.lower() == "include":
                if out.find(item.check_value) != -1:
                    match = True
                else:
                    match = False
            elif item.operator.lower() == "exclude":
                if out.find(item.check_value) == -1:
                    match = True
                else:
                    match = False

            if item.good_match:
                if match is True:
                    healthy_code = StatusCode.GREEN_CODE
                elif match is False:
                    healthy_code = StatusCode.RED_CODE
            else:
                if match is True:
                    healthy_code = StatusCode.RED_CODE
                elif match is False:
                    healthy_code = StatusCode.GREEN_CODE
        except Exception, e:
            Logger.error(e.message)
            healthy_code = StatusCode.YELLOW_CODE
Пример #4
0
 def logout(self):
     try:
         if self.client:
             self.client.close()
         return True
     except Exception, e:
         Logger.error(e.message)
Пример #5
0
 def login(self):
     try:
         self.client = SSHExecutor(self.__ssh_ip, self.__ssh_port,
                                   self.__ssh_user, config.key_file,
                                   config.key_pwd)
         self.client.open()
         return True
     except Exception, e:
         Logger.error(e.message)
Пример #6
0
 def dispatch(self):
     """
     :return:
     """
     try:
         with self.lock:
             for a_job in self.job_list:
                 self.job_holder.add_job(a_job)
     except Exception, e:
         Logger.error(e.message)
Пример #7
0
 def lod_job_from_mysql(job_loader):
     """
     :type job_loader: JobLoader
     :return:
     """
     try:
         Logger.info("into lod job from mysql")
         job_loader.load_job()
         gevent.sleep(1)
         gevent.spawn(JobLoader.lod_job_from_mysql, job_loader)
     except Exception, e:
         Logger.error(e.message)
Пример #8
0
    def do_job(self):
        try:
            job_id = self.a_job.get_id()
            service_name = self.a_job.get_service_name()
            healthy_code = self.a_job.do_all_check()

            # Refresh job status
            Logger.report('job_id[%s] [%s] is healthy_code[%s]' % (job_id, service_name, healthy_code))
            self.status_holder.set_one_status(job_id, healthy_code, self.a_job.get_check_cmd_healthy_code())

            # Success
            if healthy_code is StatusCode.GREEN_CODE:
                return

            # Monitor operation occur error
            if healthy_code == StatusCode.WHITE_CODE or healthy_code == StatusCode.YELLOW_CODE:
                content = 'job_id[%s] [%s], healthy_code[%s] cat not be monitored successfully' % (job_id, service_name, healthy_code)
                Logger.error(content)
                SendMail.send(self.a_job.get_mail_receiver(), service_name, content)
                return

            # Do not need to be recovered
            if not self.a_job.get_auto_recover():
                return

            # Stopped process
            stopped = self.a_job.stop()
            if stopped is None:
                content = 'job_id[%s] [%s] stop failed' % (job_id, self.a_job.get_service_name())
                Logger.info(content)
                return

            # Check relies
            relies = self.a_job.get_all_rely()
            if not self.status_holder.is_group_healthy(relies):
                content = 'services job_id[%s] [%s] relying is not healthy' % (job_id, self.a_job.get_service_name())
                Logger.info(content)
                return

            # Start process
            ok = self.a_job.start()
            if not ok:
                content = 'job_id[%s] [%s] start failed' % (job_id, self.a_job.get_service_name())
                Logger.info(content)
                return

            # Start ok
            content = 'job_id[%s] [%s] start success' % (job_id, self.a_job.get_service_name())
            Logger.info(content)
            SendMail.send(self.a_job.get_mail_receiver(), self.a_job.get_service_name(), content)
        except Exception, e:
            Logger.error(e.message)
Пример #9
0
 def del_job(self, job_id):
     """
     Delete a job from job_dict
     :param job_id:
     :rtype: bool | None
     """
     try:
         with self.__lock:
             if job_id in self.__job_dict:
                 del self.__job_dict[job_id]
         return True
     except Exception, e:
         Logger.error(e.message)
Пример #10
0
    def start(self):
        """ Start service using start cmd
        :rtype: bool | None
        """
        try:
            std_out, std_err = self.client.execute(self.__start_cmd)
            Logger.info(
                "id[%s]: remote[%s] execute cmd[%s], std_out[%s], std_err[%s]"
                %
                (self.__id, self.__ssh_ip, self.__start_cmd, std_out, std_err))

            return True
        except Exception, e:
            Logger.error(e.message)
Пример #11
0
    def get_job(self):
        """
        Get one job from job list
        :rtype: JobDetail | None
        """
        try:
            with self.__lock:
                if len(self.__job_list) > 0:
                    return self.__job_list.pop()

                if len(self.__job_list) == 0:
                    return None
        except Exception, e:
            Logger.error(e.message)
Пример #12
0
    def run(self):
        try:
            # Clear job status if job is not active
            if not self.a_job.is_active():
                self.status_holder.clear_one_status(self.a_job.get_id())
                self.job_holder.del_job(self.a_job.get_id())
                return

            # Do one job
            self.a_job.login()
            with Defer(self.a_job.logout):
                self.do_job()
            self.job_holder.del_job(self.a_job.get_id())
        except Exception, e:
            Logger.error(e.message)
Пример #13
0
 def add_job(self, job):
     """
     Add one job to job list. If the same job is exist in the job_dict, add failed.
     :type job: JobDetail
     :rtype: bool | None
     """
     try:
         with self.__lock:
             job_id = job.get_id()
             if job_id in self.__job_dict:
                 return False
             self.__job_list.append(job)
             self.__job_dict[job_id] = job
             return True
     except Exception, e:
         Logger.error(e.message)
Пример #14
0
    def stop(self):
        """ Stop service using stop_cmd
        :rtype: bool | None
        """
        try:
            std_out, std_err = self.client.execute(self.__stop_cmd)
            Logger.info(
                "id[%s]: remote[%s] execute cmd[%s], std_out[%s], std_err[%s]"
                %
                (self.__id, self.__ssh_ip, self.__stop_cmd, std_out, std_err))

            # check healthy
            # result = self.is_running()
            # return result
            return True
        except Exception, e:
            Logger.error(e.message)
Пример #15
0
 def __load_relies(self, a_dict):
     """
     :type a_dict: dict[int, Job]
     :return: dict[int, Job] | None
     """
     try:
         sql = 'SELECT service_id,rely_id FROM service_rely'
         Logger.info(sql)
         self.cur.execute(sql)
         results = self.cur.fetchall()
         for row in results:
             service_id, rely_id = row
             a_job = a_dict.get(service_id, None)
             if a_job is None:
                 continue
             a_job.add_rely(rely_id)
         return True
     except:
         Logger.error(traceback.format_exc())
         return None
Пример #16
0
 def __load_checks(self, a_dict):
     """
     :type a_dict: dict[int, Job]
     :return: dict[int, Job] | None
     """
     try:
         sql = "SELECT id,service_id,local_check,check_shell,operator,check_value,good_match FROM check_cmd"
         Logger.info(sql)
         self.cur.execute(sql)
         results = self.cur.fetchall()
         for row in results:
             a_id, service_id, local_check, check_shell, operator, check_value, good_match = row
             check = CheckCmd(a_id, service_id, local_check, check_shell, operator, check_value, good_match)
             a_job = a_dict.get(service_id, None)
             if a_job is None:
                 continue
             a_job.add_check(check)
         return True
     except:
         Logger.error(traceback.format_exc())
         return None
Пример #17
0
    def do_all_check(self):
        """
        Execute all check command for the job, return (is_operate_success, is_healthy)
        :rtype: bool | None
        """
        status_code = StatusCode()
        try:
            # local checking
            for item in self.__local.values():
                status, output = commands.getstatusoutput(item.check_shell)
                Logger.info(
                    "id[%s]: localhost[127.0.0.1] execute cmd[%s], status[%s], output[%s]"
                    % (self.__id, item.check_shell, status, output))
                if status != 0:
                    status_code.set_status(None)

                # check healthy
                healthy_code = self.__get_health(item, output)
                Logger.info("id[%s]: localhost[127.0.0.1] healthy_code[%s]" %
                            (self.__id, healthy_code))
                status_code.set_code(healthy_code)

            for item in self.__remote.values():
                std_out, std_err = self.client.execute(item.check_shell)
                Logger.info(
                    "id[%s]: remote[%s] execute cmd[%s], std_out[%s], std_err[%s]"
                    % (self.__id, self.__ssh_ip, item.check_shell, std_out,
                       std_err))
                if not std_out and std_err:
                    status_code.set_status(None)
                # check healthy
                healthy_code = self.__get_health(item, std_out)
                Logger.info("id[%s]: remote[%s] healthy_code[%s]" %
                            (self.__id, self.__ssh_ip, healthy_code))
                status_code.set_code(healthy_code)
        except Exception, e:
            Logger.error(e.message)
Пример #18
0
 def load_job(self):
     try:
         with self.__open() as client:
             client.__load_jobs()
     except Exception, e:
         Logger.error(e.message)
Пример #19
0
 def is_quit(self):
     try:
         with self.lock:
             return self.quit
     except Exception, e:
         Logger.error(e.message)
Пример #20
0
 def set_quit(self):
     try:
         with self.lock:
             self.quit = True
     except Exception, e:
         Logger.error(e.message)