def validateResource(self, component_container_cluster_config): zkOper = Common_ZkOpers() ip_list = zkOper.get_ips_from_ipPool() nodeCount = component_container_cluster_config.nodeCount if len(ip_list) < nodeCount: raise CommonException('ips are not enough!')
def create(self, params): if params == {} or params is None: raise UserVisiableException("please set the componentNode info!") dataNodeInternalPort = params.get('dataNodeInternalPort') if dataNodeInternalPort is not None: raise UserVisiableException( "no need to set the dataNodeInternalPort param!") zkOper = Common_ZkOpers() local_uuid = getClusterUUID() existCluster = zkOper.existCluster(local_uuid) if not existCluster: raise UserVisiableException( "sync componentCluster info error! please check if sync uuid is right!" ) params.setdefault("dataNodeInternalPort", options.port) dataNodeExternalPort = params.get('dataNodeExternalPort') if dataNodeExternalPort is None or '' == dataNodeExternalPort: params.setdefault("dataNodeExternalPort", options.port) self.confOpers.setValue(options.data_node_property, params) dataNodeProprs = self.confOpers.getValue(options.data_node_property) zkOper.writeDataNodeInfo(local_uuid, dataNodeProprs) result = {} result.setdefault( "message", "Configuration on this componentNode has been done successfully") return result
def get_ip_num(self): """ monitor item: get ip num from ip Pool """ zkOper = Common_ZkOpers() ip_list = zkOper.get_ips_from_ipPool() return len(ip_list)
def _sync_server_cluster(self): zkOper = Common_ZkOpers() cluster_uuid = zkOper.getClusterUUID() uuid_value, _ = zkOper.retrieveClusterProp(cluster_uuid) uuid_value = uuid_value.replace("'", "\"") uuid_value = json.loads(uuid_value) self.config_file_obj.setValue(options.server_cluster_property, uuid_value)
def _write_into_ipPool(self, args_dict): ip_segment = args_dict.get('ipSegment') ip_count = int(args_dict.get('ipCount')) choosed_ip = self._get_needed_ips(ip_segment, ip_count) zkOper = Common_ZkOpers() for ip in choosed_ip: zkOper.write_ip_into_ipPool(ip)
def _write_into_portPool(self, args): host_ip = args.get('hostIp') port_count = int(args.get('portCount')) start_port = int(args.get('startPort')) choosed_ports = self.__get_needed_ports(host_ip, start_port, port_count) zkOper = Common_ZkOpers() for port in choosed_ports: zkOper.write_port_into_portPool(host_ip, str(port) )
def get_illegal_ports(self, host_ip): illegal_ports = [] zkOper = Common_ZkOpers() port_list = zkOper.get_ports_from_portPool(host_ip) #logging.info('port in host: %s, in ports pool:%s ' % (host_ip, str(port_list) )) for port in port_list: ret = self.__port_legal(host_ip, port) if not ret: illegal_ports.append(port) return illegal_ports
def reload(self): _, ret_val = self.invokeCommand._runSysCmd(options.reload_nginx) result = {} if ret_val != 0: result.setdefault("message", "reload nginx failed") else: result.setdefault("message", "reload nginx successfully") container_name = retrieve_node_name() zkOper = Common_ZkOpers() zkOper.write_started_node(container_name) return result
def __get_usable_host_resource(self, host_ip, component_container_cluster_config): resource_result = {} zkOper = Common_ZkOpers() ''' get host usable memory and the condition to create containers ''' host_memory = ServerRes.retireve_server_memory(host_ip) host_mem_limit = component_container_cluster_config.mem_free_limit host_mem_can_be_used = float( host_memory["free"]) - host_mem_limit / (1024 * 1024) logging.info('memory: %s, host :%s' % (host_mem_can_be_used, host_ip)) _mem_limit = component_container_cluster_config.mem_limit container_mem_limit = _mem_limit / (1024 * 1024) mem_condition = host_mem_can_be_used > container_mem_limit ''' get host usable disk and the condition to create containers ''' host_disk = ServerRes.retireve_server_diskusage(host_ip) used_server_disk = host_disk['used'] total_server_disk = host_disk['total'] host_disk_usage_limit = component_container_cluster_config.disk_usage host_disk_can_be_used_limit = host_disk_usage_limit * total_server_disk host_disk_can_be_used = host_disk_can_be_used_limit - used_server_disk logging.info('disk: %s, host :%s' % (host_disk_can_be_used, host_ip)) disk_condition = host_disk_can_be_used > 0 quota_threshold = zkOper.retrieve_monitor_server_value() container_count = quota_threshold.get('container_count', 30) host_container_count = ServerRes.retireve_server_container_number( host_ip) container_count_condition = host_container_count < container_count host_disk_iops = ServerRes.retireve_server_diskiops(host_ip) """ need to add container threshold to our zookeeper node when update beehive """ logging.info( 'mem_condition:%s , disk_condition:%s, container count condition:%s ' % (mem_condition, disk_condition, container_count_condition)) if mem_condition and disk_condition and container_count_condition: resource_result.setdefault('memory', host_mem_can_be_used) resource_result.setdefault('disk', host_disk_can_be_used) resource_result.setdefault('container_number', host_container_count) resource_result.setdefault('diskiops', host_disk_iops) logging.info('resource result:%s' % str(resource_result)) return resource_result
def _sync_data_node(self): server_ip = getHostIp() zkOper = Common_ZkOpers() server_ip_list = zkOper.retrieve_data_node_list() if server_ip in server_ip_list: data_node_value = zkOper.retrieve_data_node_info(server_ip) if isinstance(data_node_value, dict): self.config_file_obj.setValue(options.data_node_property, data_node_value) else: logging.error('server %s should be registered first' % str(server_ip))
def retrieve_cluster_started_status(self): zkOper = Common_ZkOpers() started_nodes = zkOper.retrieve_started_nodes() total_nodes = zkOper.retrieve_nginx_node_list() started_nodes_count = len(started_nodes) total_nodes_count = len(total_nodes) if started_nodes_count == total_nodes_count: return ClusterStatus.STARTED elif 0 != started_nodes_count: return ClusterStatus.STARTED_PART else: return ClusterStatus.STOP
def reload(self): zkOper = Common_ZkOpers() node_infos = [] nodes_list = zkOper.retrieve_nginx_node_list() for nginx_node in nodes_list: info = zkOper.retrieve_nginx_node_info(nginx_node) node_infos.append(info) self.baseOpers(node_infos, OperType.reload) result_dict = { 'message': 'cluster reload processing, please wait for a moment!' } return result_dict
def baseOpers(self, node_info_list, oper_type, params={}): isLock = False lock = None zkOper = Common_ZkOpers() try: isLock, lock = zkOper.lock_cluster_start_stop_action() self.__dispatch(node_info_list, oper_type, params) except kazoo.exceptions.LockTimeout: raise ZKLockException("current operation is using by other people, please wait a moment to try again!") finally: if isLock: zkOper.unLock_cluster_start_stop_action(lock)
def sync(self): zk_address, zk_port = get_zk_address() if not (zk_address and zk_port): logging.info('admin zookeeper first!') return zkOper = Common_ZkOpers() existed = zkOper.existCluster() if existed: self._sync_server_cluster() self._sync_data_node() else: logging.info( "cluster does not exist, may be the first time to sync in a new server cluster" )
def retrieve_usable_host_resource(self, component_container_cluster_config): host_resource_dict = {} zkOper = Common_ZkOpers() servers_white_list = zkOper.retrieve_servers_white_list() _exclude_servers = component_container_cluster_config.exclude_servers logging.info('exclude server:%s, type:%s' % (str(_exclude_servers), type(_exclude_servers))) host_ip_list = list(set(servers_white_list) - set(_exclude_servers)) logging.info('hosts choosed:%s' % str(host_ip_list)) for host_ip in host_ip_list: host_resource = self.__get_usable_host_resource( host_ip, component_container_cluster_config) if host_resource: host_resource_dict.setdefault(host_ip, host_resource) return host_resource_dict
def stop(self): zkOper = Common_ZkOpers() node_infos = [] started_nodes_list = zkOper.retrieve_started_nodes() if not started_nodes_list: raise UserVisiableException( 'cluster has been stopped, no need to do this!') for nginx_node in started_nodes_list: info = zkOper.retrieve_nginx_node_info(nginx_node) node_infos.append(info) self.baseOpers(node_infos, OperType.stop) result_dict = { 'message': 'cluster stop processing, please wait for a moment!' } return result_dict
def sync(self): http_client = AsyncHTTPClient() succ, fail, return_result = [], [], '' key_sets = set() zkOper = Common_ZkOpers() server_list = zkOper.retrieve_data_node_list() try: for server in server_list: requesturi = 'http://%s:%s/inner/server/sync' % (server, options.port) logging.info('server requesturi: %s' % str(requesturi)) callback_key = server key_sets.add(callback_key) http_client.fetch(requesturi, callback=(yield Callback(callback_key))) logging.debug('key_sets:%s' % str(key_sets)) error_record = '' for i in range(len(key_sets)): callback_key = key_sets.pop() response = yield Wait(callback_key) if response.error: message = "remote access, the key:%s,\n error message:\n %s" % ( callback_key, str(response.error)) error_record += message + "|" logging.error(error_record) else: return_result = response.body.strip() if return_result: logging.debug('return_result : %s' % str(return_result)) succ.append(callback_key) else: fail.append(callback_key) finally: http_client.close() logging.debug('succ:%s' % str(succ)) logging.debug('fail:%s' % str(fail))
def syncExistedCluster(self, params): if params == {}: error_message = "please fill the cluster uuid!" raise UserVisiableException(error_message) clusterUUID = params['clusterUUID'] zkOper = Common_ZkOpers() existCluster = zkOper.existCluster(clusterUUID) if not existCluster: error_message = "Nginx componentCluster does't exist(cluster id:%s), \ please specify the right cluster uuid!" % (clusterUUID) raise UserVisiableException(error_message) data, _ = zkOper.retrieveClusterProp(clusterUUID) logging.info("data in zk %s" % (data)) json_str_data = data.replace("'", "\"") dict_data = json.loads(json_str_data) self.confOpers.setValue(options.cluster_property, dict_data)
def disable(self): zkOper = Common_ZkOpers() node_infos = [] _nodes_list = zkOper.retrieve_nginx_node_list() if not _nodes_list: raise UserVisiableException( "cluster has not node, please check the cluster's node!") for _node in _nodes_list: info = zkOper.retrieve_nginx_node_info(_node) node_infos.append(info) self.baseOpers(node_infos, OperType.disable) result_dict = { 'message': 'cluster proxy disable processing, please wait for a moment!' } return result_dict
def _get_needed_ips(self, ip_segment, ip_count): choosed_ip = [] zkOper = Common_ZkOpers() ip_list = zkOper.get_ips_from_ipPool() all_ips = self._get_all_ips(ip_segment) ips = list(set(all_ips) - set(ip_list)) num = 0 if len(ips) < ip_count: logging.info('ips usable are not enough, just add %s ips' % len(ips)) ip_count = len(ips) for ip in ips: if self.__ip_legal(ip): choosed_ip.append(ip) num += 1 if num == ip_count: break return choosed_ip
def start(self): zkOper = Common_ZkOpers() existCluster = zkOper.existCluster() if not existCluster: raise UserVisiableException("Nginx componentCluster does't exist") total_nginx_nodes = zkOper.retrieve_nginx_node_list() started_nodes = zkOper.retrieve_started_nodes() if len(total_nginx_nodes) == len(started_nodes): raise UserVisiableException( "all nginx nodes have started. No need to start them.") logging.info("all nginx nodes: %s" % (total_nginx_nodes)) to_start_nginx_nodes = list( set(total_nginx_nodes) - set(started_nodes)) logging.info("nginx needed to start: " + str(to_start_nginx_nodes)) node_infos = [] for node in to_start_nginx_nodes: info = zkOper.retrieve_nginx_node_info(node) node_infos.append(info) self.baseOpers(node_infos, OperType.start) result_dict = { 'message': 'cluster start processing, please wait for a moment!' } return result_dict
def get_illegal_ips(self, thread_num): """check ip pools thread_num: how many thread to do check if ip is legal put all ips in ip pools into store_all_ips_queue, do check ip is legal in threads, if illegal, put illegal ip into store_illegal_ips_queue, if all threads end, get illegal ips and return them """ illegal_ips, thread_obj_list = [], [] zkOper = Common_ZkOpers() ip_list = zkOper.get_ips_from_ipPool() logging.info('put all ips in ip pools into store_all_ips_queue') self.store_all_ips_queue._init(0) self.store_all_ips_queue.queue.extend(ip_list) logging.info('queue size :%s' % str(self.store_all_ips_queue.qsize())) for i in range(thread_num): thread_obj = doInThread(self.__ips_legal) thread_obj_list.append(thread_obj) while thread_obj_list: succ = [] for thread_obj in thread_obj_list: if not thread_obj.isAlive(): succ.append(thread_obj) for item in succ: thread_obj_list.remove(item) time.sleep(0.5) logging.info('get illegal_ip') while not self.store_illegal_ips_queue.empty(): illegal_ip = self.store_illegal_ips_queue.get(block=False) illegal_ips.append(illegal_ip) logging.info('illegal_ips :%s' % str(illegal_ips)) return illegal_ips
def retrieve_port_resource(self, host_ip_list, every_host_port_count=1): port_dict = {} zkOper = Common_ZkOpers() try: isLock,lock = zkOper.lock_assign_port() if isLock: for host_ip in host_ip_list: retrieve_port_list = zkOper.retrieve_port(host_ip, every_host_port_count) port_dict.setdefault(host_ip, retrieve_port_list) finally: if isLock: zkOper.unLock_assign_port(lock) return port_dict
def do_retrieve_ip_action(self, ip_count): ip_list, isLock = None, None zkOper = Common_ZkOpers() try: isLock, lock = zkOper.lock_assign_ip() if isLock: ip_list = zkOper.retrieve_ip(ip_count) except kazoo.exceptions.LockTimeout: return finally: if isLock: zkOper.unLock_assign_ip(lock) return ip_list
def create(self, params): if params == {} or params is None: raise UserVisiableException("please set the componentNode info!") dataNodeInternalPort = params.get('dataNodeInternalPort') if dataNodeInternalPort: raise UserVisiableException( "no need to set the dataNodeInternalPort param!") zkOper = Common_ZkOpers() existCluster = zkOper.existCluster() if existCluster: raise UserVisiableException( "server has belong to a componentCluster,should be not create new componentCluster!" ) clusterUUID = str(uuid.uuid1()) params.setdefault("clusterUUID", clusterUUID) params.setdefault("dataNodeInternalPort", options.port) dataNodeExternalPort = params.get('dataNodeExternalPort') if dataNodeExternalPort is None or '' == dataNodeExternalPort: params.setdefault("dataNodeExternalPort", options.port) self.confOpers.setValue(options.cluster_property, params) self.confOpers.setValue(options.data_node_property, params) clusterProps = self.confOpers.getValue(options.cluster_property) dataNodeProprs = self.confOpers.getValue(options.data_node_property) zkOper.writeClusterInfo(clusterUUID, clusterProps) zkOper.writeDataNodeInfo(clusterUUID, dataNodeProprs) return clusterUUID
def get_port_num(self, host_ip): zkOper = Common_ZkOpers() port_list = zkOper.get_ports_from_portPool(host_ip) return len(port_list)
def host_exist(self, host_ip): zk_op = Common_ZkOpers() return zk_op.check_data_node_exist(host_ip)