def start(self, profile=False, master=True, slave=True): """Start cluster :param master: If exclude master cluster, set False :param slave: If exclude slave cluster, set False """ logger.debug("command 'cluster start'") if not isinstance(profile, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='profile') logger.error(msg) return if not isinstance(master, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='master') logger.error(msg) return if not isinstance(slave, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='slave') logger.error(msg) return center = Center() center.update_ip_port() success = center.check_hosts_connection() if not success: return center.ensure_cluster_exist() if master: master_alive_count = center.get_alive_master_redis_count() master_alive_count_mine = center.get_alive_master_redis_count( check_owner=True ) not_mine_count = master_alive_count - master_alive_count_mine if not_mine_count > 0: msg = message.get('error_cluster_start_master_collision') msg = '\n'.join(msg).format(count=not_mine_count) raise LightningDBError(11, msg) if slave: slave_alive_count = center.get_alive_slave_redis_count() slave_alive_count_mine = center.get_alive_slave_redis_count( check_owner=True ) not_mine_count = slave_alive_count - slave_alive_count_mine if not_mine_count > 0: msg = message.get('error_cluster_start_slave_collision') msg = '\n'.join(msg).format(count=not_mine_count) raise LightningDBError(12, msg) center.backup_server_logs(master=master, slave=slave) center.create_redis_data_directory() # equal to cluster.configure() center.configure_redis() center.sync_conf(show_result=True) center.start_redis_process(profile, master=master, slave=slave) center.wait_until_all_redis_process_up(master=master, slave=slave)
def create(self, yes=False): """Create cluster Before create cluster, all redis should be running. :param yes: skip confirm information """ center = Center() center.update_ip_port() success = center.check_hosts_connection() if not success: return m_count = len(center.master_host_list) * len(center.master_port_list) if m_count < 3: msg = message.get('error_master_redis_less_than_3') raise ClusterRedisError(msg) # if need to cluster start alive_count = center.get_alive_all_redis_count() my_alive_count = center.get_alive_all_redis_count(check_owner=True) if alive_count != my_alive_count: msg = message.get('error_cluster_start_port_collision') raise ClusterRedisError(msg) all_count = len(center.all_host_list) if alive_count < all_count: logger.debug('cluster start in create') # init center.backup_server_logs() center.create_redis_data_directory() # cluster configure center.configure_redis() center.sync_conf(show_result=True) # cluster start center.start_redis_process() center.wait_until_all_redis_process_up() key = 'cluster-node-timeout' m_hosts = center.master_host_list m_ports = center.master_port_list origin_m_value = center.cli_config_get(key, m_hosts[0], m_ports[0]) if not origin_m_value: msg = "RedisConfigKeyError(master): '{}'".format(key) logger.warning(msg) s_hosts = center.slave_host_list s_ports = center.slave_port_list if s_hosts and s_ports: origin_s_value = center.cli_config_get(key, s_hosts[0], s_ports[0]) if not origin_s_value: msg = "RedisConfigKeyError(slave): '{}'".format(key) logger.warning(msg) if origin_m_value: # cli config set cluster-node-timeout 2000 logger.debug('set cluster node time out 2000 for create') center.cli_config_set_all(key, '2000', m_hosts, m_ports) if s_hosts and s_ports and origin_s_value: center.cli_config_set_all(key, '2000', s_hosts, s_ports) center.create_cluster(yes) if origin_m_value: # cli config restore cluster-node-timeout logger.debug('restore cluster node time out') center.cli_config_set_all(key, origin_m_value, m_hosts, m_ports) if s_hosts and s_ports and origin_s_value: v = origin_s_value center.cli_config_set_all(key, v, s_hosts, s_ports)
def add_slave(self, yes=False): """Add slave of cluster Add slaves to cluster that configured master only. :param yes: Skip confirm information """ logger.debug('add_slave') if not isinstance(yes, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='yes') logger.error(msg) return center = Center() center.update_ip_port() # check s_hosts = center.slave_host_list s_ports = center.slave_port_list if not s_hosts: msg = message.get('error_slave_host_empty') raise ClusterRedisError(msg) if not s_ports: msg = message.get('error_slave_port_empty') raise ClusterRedisError(msg) success = center.check_hosts_connection(hosts=s_hosts) if not success: return center.ensure_cluster_exist() slave_alive_count = center.get_alive_slave_redis_count() slave_alive_count_mine = center.get_alive_slave_redis_count( check_owner=True ) not_mine_count = slave_alive_count - slave_alive_count_mine if not_mine_count > 0: msg = message.get('error_cluster_start_slave_collision') msg = '\n'.join(msg).format(count=not_mine_count) raise LightningDBError(12, msg) # confirm info result = center.confirm_node_port_info(skip=yes) if not result: msg = message.get('cancel') logger.warning(msg) return # clean center.cluster_clean(master=False) # backup logs center.backup_server_logs(master=False) center.create_redis_data_directory(master=False) # configure center.configure_redis(master=False) center.sync_conf() # start center.start_redis_process(master=False) center.wait_until_all_redis_process_up() # change redis config temporarily key = 'cluster-node-timeout' origin_s_value = center.cli_config_get(key, s_hosts[0], s_ports[0]) if not origin_s_value: msg = "RedisConfigKeyError: '{}'".format(key) logger.warning(msg) if origin_s_value: # cli config set cluster-node-timeout 2000 logger.debug('set cluster node time out 2000 for create') center.cli_config_set_all(key, '2000', s_hosts, s_ports) # create center.replicate() if origin_s_value: # cli config restore cluster-node-timeout logger.debug('restore cluster node time out') center.cli_config_set_all(key, origin_s_value, s_hosts, s_ports)
def _deploy_zero_downtime(cluster_id): logger.debug("zero downtime update cluster {}".format(cluster_id)) center = Center() center.update_ip_port() m_hosts = center.master_host_list m_ports = center.master_port_list s_hosts = center.slave_host_list s_ports = center.slave_port_list path_of_fb = config.get_path_of_fb(cluster_id) cluster_path = path_of_fb['cluster_path'] # check master alive m_count = len(m_hosts) * len(m_ports) alive_m_count = center.get_alive_master_redis_count() if alive_m_count < m_count: logger.error(message.get('error_exist_disconnected_master')) return if not config.is_slave_enabled: logger.error(message.get('error_need_to_slave')) return # select installer installer_path = ask_util.installer() installer_name = os.path.basename(installer_path) # backup info current_time = time.strftime("%Y%m%d%H%M%S", time.gmtime()) conf_backup_dir = 'cluster_{}_conf_bak_{}'.format(cluster_id, current_time) cluster_backup_dir = 'cluster_{}_bak_{}'.format(cluster_id, current_time) local_ip = config.get_local_ip() # backup conf center.conf_backup(local_ip, cluster_id, conf_backup_dir) # backup cluster for host in s_hosts: client = net.get_ssh(host) center.cluster_backup(host, cluster_id, cluster_backup_dir) client.close() # transfer & install logger.info(message.get('transfer_and_execute_installer')) for host in m_hosts: logger.info(' - {}'.format(host)) client = net.get_ssh(host) cmd = 'mkdir -p {0} && touch {0}/.deploy.state'.format(cluster_path) net.ssh_execute(client=client, command=cmd) client.close() DeployUtil().transfer_installer(host, cluster_id, installer_path) try: DeployUtil().install(host, cluster_id, installer_name) except SSHCommandError as ex: msg = message.get('error_execute_installer') msg = msg.format(installer=installer_path) logger.error(msg) logger.exception(ex) return # restore conf center.conf_restore(local_ip, cluster_id, conf_backup_dir) # set deploy state complete for node in m_hosts: path_of_fb = config.get_path_of_fb(cluster_id) cluster_path = path_of_fb['cluster_path'] client = net.get_ssh(node) cmd = 'rm -rf {}'.format(os.path.join(cluster_path, '.deploy.state')) net.ssh_execute(client=client, command=cmd) client.close() # restart slave center.stop_redis(master=False) center.configure_redis(master=False) center.sync_conf() center.start_redis_process(master=False) center.wait_until_all_redis_process_up() # check slave is alive slaves_for_failover = center.check_all_master_have_alive_slave() key = 'cluster-node-timeout' origin_m_value = center.cli_config_get(key, m_hosts[0], m_ports[0]) origin_s_value = center.cli_config_get(key, s_hosts[0], s_ports[0]) logger.debug('config set: cluster-node-timeout 2000') RedisCliConfig().set(key, '2000', all=True) # cluster failover (with no option) logger.info(message.get('failover_on_deploy')) logger.debug(slaves_for_failover) try_count = 0 while try_count < 10: try_count += 1 success = True for slave_addr in slaves_for_failover: host, port = slave_addr.split(':') stdout = center.run_failover("{}:{}".format(host, port)) logger.debug("failover {}:{} {}".format(host, port, stdout)) if stdout != "ERR You should send CLUSTER FAILOVER to a slave": # In some cases, the cluster failover is not complete # even if stdout is OK # If redis changed to master completely, # return 'ERR You should send CLUSTER FAILOVER to a slave' success = False if success: break msg = message.get('retry').format(try_count=try_count) logger.info(msg) time.sleep(5) logger.debug('restore config: cluster-node-timeout') center.cli_config_set_all(key, origin_m_value, m_hosts, m_ports) center.cli_config_set_all(key, origin_s_value, s_hosts, s_ports) if not success: logger.error(message.get('error_redis_failover')) return # restart master (current slave) center.stop_redis(slave=False) center.configure_redis(slave=False) center.sync_conf() center.start_redis_process(slave=False) center.wait_until_all_redis_process_up() # change host info of redis.properties props_path = path_of_fb['redis_properties'] after_m_ports = list( set(map(lambda x: int(x.split(':')[1]), slaves_for_failover))) after_s_ports = list(set(s_ports + m_ports) - set(after_m_ports)) logger.debug("master port {}".format(m_ports)) logger.debug("slave port {}".format(s_ports)) key = 'sr2_redis_master_ports' logger.debug("next master port {}".format(after_m_ports)) value = cluster_util.convert_list_2_seq(after_m_ports) logger.debug("converted {}".format(value)) config.set_props(props_path, key, value) key = 'sr2_redis_slave_ports' logger.debug("next slave port {}".format(after_s_ports)) value = cluster_util.convert_list_2_seq(after_s_ports) logger.debug("converted {}".format(value)) config.set_props(props_path, key, value)