Exemplo n.º 1
0
    def __verify_params(self, args):
        """check params legality
        
           return is right and the right params
        """

        url = args.get('url')
        logging.info('get url:%s' % url)
        if not url:
            raise CommonException('url params are not supplied!')

        cron_set = set(['cron', 'interval', 'date']) & set(args)
        cron_key = cron_set.pop()
        logging.info('get cron key:%s' % cron_key)
        if not cron_key:
            logging.error('wrong here')
            raise CommonException('cron trigger params are not supplied!')

        cron_value = args.get(cron_key)
        cron = {cron_key: cron_value}

        queue_name = options.queue_http_request
        priority = args.get('priority')
        if priority and priority == '0':
            queue_name = options.queue_urgent

        params = (queue_name, url, cron)
        return params
    def __init__(self):

        super(Node_stop_action, self).__init__()

        self.zkOper = self.retrieve_zkOper()
        try:
            self.isLock, self.lock = self.zkOper.lock_node_start_stop_action()
        except kazoo.exceptions.LockTimeout:
            raise CommonException(
                "When stop node, can't retrieve the stop atcion lock!")

        if not self.isLock:
            raise CommonException(
                "When stop node, can't retrieve the stop action lock!")
    def _sort_seqno(self, param):
        uuid_unique_list = []
        seqno_list = []
        self.start_node_ip_list = []

        for (data_node_ip, uuid_seqno_sub_dict) in param.items():

            uuid = uuid_seqno_sub_dict.get('uuid')
            if '00000000-0000-0000-0000-000000000000' == uuid:
                self.start_node_ip_list.append(data_node_ip)
                continue

            uuid_unique_list.append(uuid)
            seqno_list.append(uuid_seqno_sub_dict.get('seqno'))

        uuid_unique_set = set(uuid_unique_list)
        if len(uuid_unique_set) != 1:
            error_message = "node's uuid_seqno api no return unique uuid!"
            raise CommonException(error_message)

        seqno_list.sort()

        logging.info("After seqno sort, the seqno_list value is %s" %
                     str(seqno_list))

        for seqno in seqno_list:
            for (data_node_ip, uuid_seqno_sub_dict) in dict.items():
                seqno_target = uuid_seqno_sub_dict.get('seqno')
                if seqno == seqno_target:
                    self.start_node_ip_list.insert(0, data_node_ip)
                    del dict[data_node_ip]
                    break

        return self.start_node_ip_list
Exemplo n.º 4
0
    def _stat_wsrep_status(self):
        conn = self.dba_opers.get_mysql_connection()
        if conn is None:
            raise CommonException("Can\'t connect to mysql server")

        try:
            rows = self.dba_opers.show_status(conn)
        finally:
            conn.close()

        key_value = retrieve_kv_from_db_rows(rows,['wsrep_flow_control_paused',\
                                                   'wsrep_flow_control_sent',\
                                                   'wsrep_local_recv_queue_avg',\
                                                   'wsrep_local_send_queue_avg'])

        slowest_node_param_dict = {}
        slowest_node_param_dict.setdefault(
            'wsrep_flow_control_sent',
            key_value.get('wsrep_flow_control_sent'))
        slowest_node_param_dict.setdefault(
            'wsrep_local_recv_queue_avg',
            key_value.get('wsrep_local_recv_queue_avg'))

        result = {}
        result.setdefault('wsrep_flow_control_paused',
                          key_value.get('wsrep_flow_control_paused'))
        result.setdefault('slowest_node_param', slowest_node_param_dict)
        result.setdefault('wsrep_local_send_queue_avg',
                          key_value.get('wsrep_local_send_queue_avg'))

        return result
Exemplo n.º 5
0
    def get_peer_wsrep_status(self):
        logging.info("can not connect to local database site")

        cluster_started_nodes = self.zkOper.retrieve_started_nodes()

        confDict = self.confOpers.getValue(options.data_node_property,
                                           ['dataNodeIp'])
        local_ip = confDict['dataNodeIp']

        logging.info("local ip:" + str(local_ip))
        if cluster_started_nodes.count(local_ip) != 0:
            cluster_started_nodes.remove(local_ip)
        logging.info("candicates are: " + str(cluster_started_nodes))
        result = ""
        for ip in cluster_started_nodes:
            url_post = "/db/all/stat?inner=true"
            result = self.communicate(ip, url_post)
            logging.info("origin result: " + str(result))
            logging.info(result.replace("\\", ""))
            if result.count("wsrep_status_dict") != 0:
                break
        if result.count("wsrep_status_dict") != 0:
            result_dict = json.loads(result)
            return result_dict["response"]
        else:
            raise CommonException("Can\'t connect to mysql server")
Exemplo n.º 6
0
    def _check_wsrep_ready(self, key_value):
        if key_value == {}:
            raise CommonException("the param should be not null")

        value = key_value.get('wsrep_ready')
        if 'ON' != value:
            logging.error("wsrep ready is " + value)
            return False

        value = key_value.get('wsrep_cluster_status')
        if 'Primary' != value:
            logging.error("wsrep cluster status is " + value)
            return False

        value = key_value.get('wsrep_connected')
        if 'ON' != value:
            logging.error("wsrep connected is " + value)
            return False

        value = key_value.get('wsrep_local_state_comment')
        if value != 'Synced' and value != 'Donor/Desynced':
            logging.error("wsrep local state comment is " + value)
            return False

        return True
Exemplo n.º 7
0
    def writeFullText(self, fileName, fullText):
        if not os.path.exists(fileName):
            raise CommonException("%s file not existed!" % (fileName))

        outputstream = open(fileName, 'w')
        outputstream.write('')
        outputstream.write(fullText)
        outputstream.close()
Exemplo n.º 8
0
    def _singleton(*args, **kw):

        zk_addr, zk_port = local_get_zk_address()
        if not (zk_addr and zk_port):
            raise CommonException('zookeeper address and port are not written!')

        if cls not in instances:
            logging.info('init class : %s' % str(cls))
            instances[cls] = cls(*args, **kw)
        return instances[cls]
Exemplo n.º 9
0
    def getClusterUUID(self):
        try:
            dataNodeName = self.DEFAULT_RETRY_POLICY(
                self.zk.get_children, self.rootPath + '/' + CLUSTER_NAME)
        except SessionExpiredError:
            dataNodeName = self.DEFAULT_RETRY_POLICY(
                self.zk.get_children, self.rootPath + '/' + CLUSTER_NAME)

        if dataNodeName is None or dataNodeName.__len__() == 0:
            raise CommonException(
                'cluster uuid is null.please check the zk connection or check if existed cluster uuid.'
            )

        return CLUSTER_NAME + '/' + dataNodeName[0]
Exemplo n.º 10
0
    def check_db_anti_item(self, action_timeout):

        def __check_db_anti():
            try:
                db_anti_hanlder_worker = Monitor_Backend_Handle_Worker(action_timeout, 'monitor_anti')
                db_anti_hanlder_worker.start()
            except Exception:
                self.threading_exception_queue.put(sys.exc_info())

        if action_timeout < 0:
            raise CommonException('timeout cannot be negative!')

        _anti_async_t = PeriodicCallback(__check_db_anti, action_timeout * 1000)
        _anti_async_t.start()
Exemplo n.º 11
0
    def sced_monitor_handler(self, action_timeout = 30):
        # Create a periodic callback that tries to access async monitor interface
        def __create_worker_check_monitor():
            monitor_backend_worker = Monitor_Backend_Handle_Worker(action_timeout, 'monitor')
            try:
                monitor_backend_worker.start()
            except Exception:
                self.threading_exception_queue.put(sys.exc_info())

        if action_timeout < 0:
            raise CommonException('timeout cannot be negative!')

        _monitor_async_t = PeriodicCallback(__create_worker_check_monitor,
                                            action_timeout * 1000)
        _monitor_async_t.start()
    def __init__(self):
        super(Cluster_stop_action, self).__init__()

        zkOper = self.retrieve_zkOper()
        try:
            self.isLock, lock = zkOper.lock_cluster_start_stop_action()
        except kazoo.exceptions.LockTimeout:
            logging.info(
                "a thread is stopping this cluster, give up this operation!")
            return

        if not self.isLock:
            raise CommonException(
                'a thread is stopping this cluster, give up this operation!')

        self.lock = lock
    def _issue_start_action(self, cluster_flag, cluster_mode):
        node_wsrep_status_dict = {}
        data_node_started_flag_dict = {}
        need_start_node_ip_list = []
        arbitrator_node_ip = []
        status_dict = {}

        try:
            data_node_info_list = self.zkOper.retrieve_data_node_list()
            node_num = len(data_node_info_list)
            adminUser, adminPasswd = _retrieve_userName_passwd()

            if None != cluster_mode:
                mode_dict = {"cluster_mode": cluster_mode}
                self.zkOper.writeClusterMode(mode_dict)

            if cluster_flag == 'new':
                status_dict.setdefault("_status", "initializing")
                self.zkOper.writeClusterStatus(status_dict)
                portstatus_obj = PortStatus()
                need_start_node_ip_list = portstatus_obj.check_port(
                    data_node_info_list)
                logging.info("need_start_node_ip_list:" +
                             str(need_start_node_ip_list))

                if node_num - len(need_start_node_ip_list) != 1:
                    error_message = "data nodes's status is abnormal."
                    status_dict['_status'] = 'failed'
                    self.zkOper.writeClusterStatus(status_dict)
                    # logging.error("Some nodes's status are abnormal")
                    raise CommonException(error_message)
                '''
                @todo: for arbitrator mode? need this code?
                '''
                if cluster_mode == "asymmetric":
                    arbitrator_node = Arbitrator()
                    arbitrator_ip = arbitrator_node.get_ip(data_node_info_list)
                    arbitrator_node_ip.append(arbitrator_ip)
                    need_start_node_ip_list.remove(arbitrator_ip)

            else:
                wsrepstatus_obj = WsrepStatus()
                w_num = wsrepstatus_obj.check_wsrep(data_node_info_list,
                                                    node_wsrep_status_dict,
                                                    node_num)
                if w_num == node_num:
                    error_message = "all data node's status is ok. No need to start them."
                    raise CommonException(error_message)

                logging.info(
                    "Check the data node wsrep status for start cluster, the wsrep status value is %s"
                    % str(node_wsrep_status_dict))

                data_node_stop_finished_flag_dict = {}

                stop_issue_obj = StopIssue()
                stop_finished_count = stop_issue_obj.issue_stop(
                    node_wsrep_status_dict, data_node_stop_finished_flag_dict,
                    adminUser, adminPasswd)
                if stop_finished_count == len(
                        data_node_stop_finished_flag_dict):
                    status_dict['_status'] = 'stopped'
                    self.zkOper.writeClusterStatus(status_dict)
                    self._send_email(
                        "mcluster",
                        " mysql service have been stopped in the cluster")

                logging.info('nodes stopping finished!')

                _galerastatus_obj = GaleraStatus()

                uuid_seqno_dict = _galerastatus_obj.check_status(
                    data_node_stop_finished_flag_dict)
                err_node = ""
                for (node_ip, value) in uuid_seqno_dict.items():
                    if not value:
                        err_node.join(node_ip)

                if err_node != "":
                    error_message = "data node(%s) error, please check the status and start it by human." % (
                        node_ip)
                    status_dict['_status'] = 'failed'
                    self.zkOper.writeClusterStatus(status_dict)
                    raise CommonException(error_message)

                need_start_node_ip_list = self._sort_seqno(uuid_seqno_dict)
                logging.info("After sort, the uuid_seqno_dict value is %s" %
                             str(need_start_node_ip_list))

            url_post = "/node/start"
            logging.info("/node/start start issue!")
            logging.info("need_start_node_ip_list:" +
                         str(need_start_node_ip_list))

            for data_node_ip in need_start_node_ip_list:
                started_nodes = self.zkOper.retrieve_started_nodes()
                #                started_nodes_count = len(started_nodes)

                isNewCluster = False
                if len(started_nodes) == 0:
                    isNewCluster = True

                args_dict = {}
                args_dict.setdefault("isNewCluster", str(isNewCluster))

                requesturi = "http://%s:%s%s" % (data_node_ip, options.port,
                                                 url_post)
                request = HTTPRequest(url=requesturi,
                                      method='POST',
                                      body=urllib.urlencode(args_dict),
                                      auth_username=adminUser,
                                      auth_password=adminPasswd)

                _request_fetch(request, timeout=100)

                start_finished = self._check_start_status(
                    data_node_ip, cluster_flag)
                logging.info('request from node/start is ' +
                             str(start_finished))
                if start_finished == False:
                    status_dict['_status'] = 'failed'
                    self.zkOper.writeClusterStatus(status_dict)
                    error_message = "%s'database status is failed!" % (
                        data_node_ip)
                    raise CommonException(error_message)

                logging.info("check started nodes ok!")
                data_node_started_flag_dict.setdefault(data_node_ip,
                                                       start_finished)
                '''
                @todo: for arbitrator need this code?
                '''
                if cluster_mode == "asymmetric":
                    arbitrator_ip = arbitrator_node_ip[0]
                    url_post = "arbitrator/node/start"
                    requesturi = "http://%s:%s%s" % (arbitrator_ip,
                                                     options.port, url_post)
                    request = HTTPRequest(url=requesturi,
                                          method='POST',
                                          body=urllib.urlencode(args_dict),
                                          auth_username=adminUser,
                                          auth_password=adminPasswd)
                    logging.info("issue " + requesturi)
                    return_result = _request_fetch(request)
                    if return_result == False:
                        raise HTTPAPIError("Garbd arbitrator start failed", \
                        notification = "direct", \
                        log_message="garbd arbitrator start failed",
                        response = "garbd arbitrator start failed")
                    else:
                        self.zkOper.write_started_node(arbitrator_ip)

            cluster_started_nodes_list = self.zkOper.retrieve_started_nodes()
            nodes_online = len(cluster_started_nodes_list)

            logging.info("starts nodes" + str(nodes_online))
            if nodes_online == node_num:
                status_dict['_status'] = 'running'
                self._send_email(
                    "mcluster",
                    " mysql services have been started in the cluster")
            else:
                status_dict['_status'] = 'failed'
                self._send_email("mcluster",
                                 " mysql services have benn started error")

            self.zkOper.writeClusterStatus(status_dict)
        except Exception, e:
            logging.error(e)
            status_dict['_status'] = 'failed'
            self.zkOper.writeClusterStatus(status_dict)
            raise e