Пример #1
0
    def start(self, profile=False, master=True, slave=True):
        """Start cluster

        :param master: If exclude master cluster, set False
        :param slave: If exclude slave cluster, set False
        """
        logger.debug("command 'cluster start'")
        if not isinstance(profile, bool):
            msg = message.get('error_option_type_not_boolean')
            msg = msg.format(option='profile')
            logger.error(msg)
            return
        if not isinstance(master, bool):
            msg = message.get('error_option_type_not_boolean')
            msg = msg.format(option='master')
            logger.error(msg)
            return
        if not isinstance(slave, bool):
            msg = message.get('error_option_type_not_boolean')
            msg = msg.format(option='slave')
            logger.error(msg)
            return
        center = Center()
        center.update_ip_port()
        success = center.check_hosts_connection()
        if not success:
            return
        center.ensure_cluster_exist()
        if master:
            master_alive_count = center.get_alive_master_redis_count()
            master_alive_count_mine = center.get_alive_master_redis_count(
                check_owner=True
            )
            not_mine_count = master_alive_count - master_alive_count_mine
            if not_mine_count > 0:
                msg = message.get('error_cluster_start_master_collision')
                msg = '\n'.join(msg).format(count=not_mine_count)
                raise LightningDBError(11, msg)
        if slave:
            slave_alive_count = center.get_alive_slave_redis_count()
            slave_alive_count_mine = center.get_alive_slave_redis_count(
                check_owner=True
            )
            not_mine_count = slave_alive_count - slave_alive_count_mine
            if not_mine_count > 0:
                msg = message.get('error_cluster_start_slave_collision')
                msg = '\n'.join(msg).format(count=not_mine_count)
                raise LightningDBError(12, msg)
        center.backup_server_logs(master=master, slave=slave)
        center.create_redis_data_directory()

        # equal to cluster.configure()
        center.configure_redis()
        center.sync_conf(show_result=True)

        center.start_redis_process(profile, master=master, slave=slave)
        center.wait_until_all_redis_process_up(master=master, slave=slave)
Пример #2
0
    def create(self, yes=False):
        """Create cluster

        Before create cluster, all redis should be running.
        :param yes: skip confirm information
        """
        center = Center()
        center.update_ip_port()
        success = center.check_hosts_connection()
        if not success:
            return

        m_count = len(center.master_host_list) * len(center.master_port_list)
        if m_count < 3:
            msg = message.get('error_master_redis_less_than_3')
            raise ClusterRedisError(msg)

        # if need to cluster start
        alive_count = center.get_alive_all_redis_count()
        my_alive_count = center.get_alive_all_redis_count(check_owner=True)
        if alive_count != my_alive_count:
            msg = message.get('error_cluster_start_port_collision')
            raise ClusterRedisError(msg)
        all_count = len(center.all_host_list)
        if alive_count < all_count:
            logger.debug('cluster start in create')
            # init
            center.backup_server_logs()
            center.create_redis_data_directory()

            # cluster configure
            center.configure_redis()
            center.sync_conf(show_result=True)

            # cluster start
            center.start_redis_process()
            center.wait_until_all_redis_process_up()

        key = 'cluster-node-timeout'
        m_hosts = center.master_host_list
        m_ports = center.master_port_list
        origin_m_value = center.cli_config_get(key, m_hosts[0], m_ports[0])
        if not origin_m_value:
            msg = "RedisConfigKeyError(master): '{}'".format(key)
            logger.warning(msg)
        s_hosts = center.slave_host_list
        s_ports = center.slave_port_list
        if s_hosts and s_ports:
            origin_s_value = center.cli_config_get(key, s_hosts[0], s_ports[0])
            if not origin_s_value:
                msg = "RedisConfigKeyError(slave): '{}'".format(key)
                logger.warning(msg)
        if origin_m_value:
            # cli config set cluster-node-timeout 2000
            logger.debug('set cluster node time out 2000 for create')
            center.cli_config_set_all(key, '2000', m_hosts, m_ports)
            if s_hosts and s_ports and origin_s_value:
                center.cli_config_set_all(key, '2000', s_hosts, s_ports)
        center.create_cluster(yes)
        if origin_m_value:
            # cli config restore cluster-node-timeout
            logger.debug('restore cluster node time out')
            center.cli_config_set_all(key, origin_m_value, m_hosts, m_ports)
            if s_hosts and s_ports and origin_s_value:
                v = origin_s_value
                center.cli_config_set_all(key, v, s_hosts, s_ports)
Пример #3
0
    def add_slave(self, yes=False):
        """Add slave of cluster

        Add slaves to cluster that configured master only.
        :param yes: Skip confirm information
        """
        logger.debug('add_slave')
        if not isinstance(yes, bool):
            msg = message.get('error_option_type_not_boolean')
            msg = msg.format(option='yes')
            logger.error(msg)
            return
        center = Center()
        center.update_ip_port()
        # check
        s_hosts = center.slave_host_list
        s_ports = center.slave_port_list
        if not s_hosts:
            msg = message.get('error_slave_host_empty')
            raise ClusterRedisError(msg)
        if not s_ports:
            msg = message.get('error_slave_port_empty')
            raise ClusterRedisError(msg)
        success = center.check_hosts_connection(hosts=s_hosts)
        if not success:
            return
        center.ensure_cluster_exist()
        slave_alive_count = center.get_alive_slave_redis_count()
        slave_alive_count_mine = center.get_alive_slave_redis_count(
            check_owner=True
        )
        not_mine_count = slave_alive_count - slave_alive_count_mine
        if not_mine_count > 0:
            msg = message.get('error_cluster_start_slave_collision')
            msg = '\n'.join(msg).format(count=not_mine_count)
            raise LightningDBError(12, msg)

        # confirm info
        result = center.confirm_node_port_info(skip=yes)
        if not result:
            msg = message.get('cancel')
            logger.warning(msg)
            return
        # clean
        center.cluster_clean(master=False)
        # backup logs
        center.backup_server_logs(master=False)
        center.create_redis_data_directory(master=False)
        # configure
        center.configure_redis(master=False)
        center.sync_conf()
        # start
        center.start_redis_process(master=False)
        center.wait_until_all_redis_process_up()

        # change redis config temporarily
        key = 'cluster-node-timeout'
        origin_s_value = center.cli_config_get(key, s_hosts[0], s_ports[0])
        if not origin_s_value:
            msg = "RedisConfigKeyError: '{}'".format(key)
            logger.warning(msg)
        if origin_s_value:
            # cli config set cluster-node-timeout 2000
            logger.debug('set cluster node time out 2000 for create')
            center.cli_config_set_all(key, '2000', s_hosts, s_ports)
        # create
        center.replicate()
        if origin_s_value:
            # cli config restore cluster-node-timeout
            logger.debug('restore cluster node time out')
            center.cli_config_set_all(key, origin_s_value, s_hosts, s_ports)
Пример #4
0
def _deploy_zero_downtime(cluster_id):
    logger.debug("zero downtime update cluster {}".format(cluster_id))
    center = Center()
    center.update_ip_port()
    m_hosts = center.master_host_list
    m_ports = center.master_port_list
    s_hosts = center.slave_host_list
    s_ports = center.slave_port_list
    path_of_fb = config.get_path_of_fb(cluster_id)
    cluster_path = path_of_fb['cluster_path']

    # check master alive
    m_count = len(m_hosts) * len(m_ports)
    alive_m_count = center.get_alive_master_redis_count()
    if alive_m_count < m_count:
        logger.error(message.get('error_exist_disconnected_master'))
        return

    if not config.is_slave_enabled:
        logger.error(message.get('error_need_to_slave'))
        return

    # select installer
    installer_path = ask_util.installer()
    installer_name = os.path.basename(installer_path)

    # backup info
    current_time = time.strftime("%Y%m%d%H%M%S", time.gmtime())
    conf_backup_dir = 'cluster_{}_conf_bak_{}'.format(cluster_id, current_time)
    cluster_backup_dir = 'cluster_{}_bak_{}'.format(cluster_id, current_time)
    local_ip = config.get_local_ip()

    # backup conf
    center.conf_backup(local_ip, cluster_id, conf_backup_dir)

    # backup cluster
    for host in s_hosts:
        client = net.get_ssh(host)
        center.cluster_backup(host, cluster_id, cluster_backup_dir)
        client.close()

    # transfer & install
    logger.info(message.get('transfer_and_execute_installer'))
    for host in m_hosts:
        logger.info(' - {}'.format(host))
        client = net.get_ssh(host)
        cmd = 'mkdir -p {0} && touch {0}/.deploy.state'.format(cluster_path)
        net.ssh_execute(client=client, command=cmd)
        client.close()
        DeployUtil().transfer_installer(host, cluster_id, installer_path)
        try:
            DeployUtil().install(host, cluster_id, installer_name)
        except SSHCommandError as ex:
            msg = message.get('error_execute_installer')
            msg = msg.format(installer=installer_path)
            logger.error(msg)
            logger.exception(ex)
            return

    # restore conf
    center.conf_restore(local_ip, cluster_id, conf_backup_dir)

    # set deploy state complete
    for node in m_hosts:
        path_of_fb = config.get_path_of_fb(cluster_id)
        cluster_path = path_of_fb['cluster_path']
        client = net.get_ssh(node)
        cmd = 'rm -rf {}'.format(os.path.join(cluster_path, '.deploy.state'))
        net.ssh_execute(client=client, command=cmd)
        client.close()

    # restart slave
    center.stop_redis(master=False)
    center.configure_redis(master=False)
    center.sync_conf()
    center.start_redis_process(master=False)
    center.wait_until_all_redis_process_up()

    # check slave is alive
    slaves_for_failover = center.check_all_master_have_alive_slave()

    key = 'cluster-node-timeout'
    origin_m_value = center.cli_config_get(key, m_hosts[0], m_ports[0])
    origin_s_value = center.cli_config_get(key, s_hosts[0], s_ports[0])
    logger.debug('config set: cluster-node-timeout 2000')
    RedisCliConfig().set(key, '2000', all=True)

    # cluster failover (with no option)
    logger.info(message.get('failover_on_deploy'))
    logger.debug(slaves_for_failover)
    try_count = 0
    while try_count < 10:
        try_count += 1
        success = True
        for slave_addr in slaves_for_failover:
            host, port = slave_addr.split(':')
            stdout = center.run_failover("{}:{}".format(host, port))
            logger.debug("failover {}:{} {}".format(host, port, stdout))
            if stdout != "ERR You should send CLUSTER FAILOVER to a slave":
                # In some cases, the cluster failover is not complete
                # even if stdout is OK
                # If redis changed to master completely,
                # return 'ERR You should send CLUSTER FAILOVER to a slave'
                success = False
        if success:
            break
        msg = message.get('retry').format(try_count=try_count)
        logger.info(msg)
        time.sleep(5)
    logger.debug('restore config: cluster-node-timeout')
    center.cli_config_set_all(key, origin_m_value, m_hosts, m_ports)
    center.cli_config_set_all(key, origin_s_value, s_hosts, s_ports)
    if not success:
        logger.error(message.get('error_redis_failover'))
        return

    # restart master (current slave)
    center.stop_redis(slave=False)
    center.configure_redis(slave=False)
    center.sync_conf()
    center.start_redis_process(slave=False)
    center.wait_until_all_redis_process_up()

    # change host info of redis.properties
    props_path = path_of_fb['redis_properties']
    after_m_ports = list(
        set(map(lambda x: int(x.split(':')[1]), slaves_for_failover)))
    after_s_ports = list(set(s_ports + m_ports) - set(after_m_ports))
    logger.debug("master port {}".format(m_ports))
    logger.debug("slave port {}".format(s_ports))
    key = 'sr2_redis_master_ports'
    logger.debug("next master port {}".format(after_m_ports))
    value = cluster_util.convert_list_2_seq(after_m_ports)
    logger.debug("converted {}".format(value))
    config.set_props(props_path, key, value)
    key = 'sr2_redis_slave_ports'
    logger.debug("next slave port {}".format(after_s_ports))
    value = cluster_util.convert_list_2_seq(after_s_ports)
    logger.debug("converted {}".format(value))
    config.set_props(props_path, key, value)