Пример #1
0
Файл: init.py Проект: nrc/tiup
    def check_systemd_version(self, info=None):
        # info: systemd info callback by ansible
        _lower_version = []

        for _host, _vars in info['success'].iteritems():
            for systemd_info in _vars['results']:
                # get sysmted version
                if systemd_info['yumstate'] == 'installed':
                    _version = '{}-{}'.format(systemd_info['version'],
                                              systemd_info['release'])
                    # when version less than 219-52.el7, will record
                    if _version < '219-52.el7':
                        _lower_version.append([_host, _version])

        if _lower_version:
            term.warn(
                'Some machine\'s systemd service version lower than "219-52.el7".'
            )
            _length = max(max([len(str(x[0])) for x in _lower_version]),
                          len('IP'))
            term.normal('IP'.ljust(_length + 2) + 'Systemd_Version')
            for _node in _lower_version:
                term.normal('{}{}'.format(_node[0].ljust(_length + 2),
                                          _node[1]))
            term.warn(
                'There are some memory bugs in lower systemd version(lower than "219-52.el7"). '
                'Refer to https://access.redhat.com/discussions/3536621.')
            if not utils.ticontinue():
                exit(1)
Пример #2
0
Файл: start.py Проект: nrc/tiup
    def _process(self, component=None, pattern=None, node=None, role=None):
        if node:
            term.notice('Start specified node in cluster.')
        elif role:
            term.notice('Start specified role in cluster.')
        else:
            term.notice('Start TiDB cluster.')
        _topology = self.topology.role_node(roles=role, nodes=node)

        if not self.demo:
            term.info('Check ssh connection.')
            self.act.check_ssh_connection()

        for service in self.topology.service_group:
            component, pattern = self.check_exist(service, config=_topology)
            if not component and not pattern:
                continue
            if not node:
                term.normal('Starting {}.'.format(component))
                self.act.start_component(component, pattern)
            else:
                _uuid = [x['uuid'] for x in _topology[pattern]]
                term.normal('Starting {}, node list: {}.'.format(
                    component, ','.join(_uuid)))
                self.act.start_component(component, pattern, ','.join(_uuid))
Пример #3
0
    def _process(self, component=None, pattern=None, node=None, role=None):
        # creart directory
        term.info('Create directory in all nodes.')
        for service in self.topology.service_group:
            component, pattern = self.check_exist(service,
                                                  config=self.topology())
            if not component and not pattern:
                continue
            self.act.create_directory(component=component, pattern=pattern)

        if not self.demo:
            self.act.check_machine_config()

        # start run deploy
        if self.demo:
            term.warn(
                'FirewallD is being disabled on deployment machines in quick deploy mode.'
            )
        for service in self.topology.service_group:
            component, pattern = self.check_exist(service,
                                                  config=self.topology())
            if not component and not pattern:
                continue
            term.normal('Deploy {}.'.format(component))
            self.act.deploy_component(component=component, pattern=pattern)
            self.act.deploy_firewall(component=component, pattern=pattern)

        if not self.demo:
            self.act.deploy_tool()
Пример #4
0
Файл: init.py Проект: nrc/tiup
    def check_hostname(self, facts=None):
        # facts is ansible callback
        _hostname_list = {}
        for _host, _vars in facts['success'].iteritems():
            _hostname = _vars['ansible_facts']['ansible_hostname']
            if _hostname_list.has_key(_hostname):
                _hostname_list[_hostname].append(_host)
            else:
                _hostname_list[_hostname] = [_host]

        # check if have conflict hostname between different host
        _cache_hostname_list = copy.deepcopy(_hostname_list)
        for _host_name, _ip in _hostname_list.iteritems():
            if len(_ip) == 1:
                del _cache_hostname_list[_host_name]

        if _cache_hostname_list:
            term.warn("Some machine\'s hostname conflict.")
            _length = max(
                max([len(str(x)) for x in _cache_hostname_list.keys()]),
                len('Hostname'))
            term.normal('Hostname'.ljust(_length + 2) + 'Hosts')
            for _hostname, _hosts in _cache_hostname_list.iteritems():
                term.normal('{}{}'.format(_hostname.ljust(_length + 2),
                                          ', '.join(_hosts)))
            if not utils.ticontinue():
                exit(1)
Пример #5
0
 def _prepare(self, component=None, pattern=None, node=None, role=None):
     try:
         self.cmd = ' '.join(self._args.cmd)
     except AttributeError:
         raise exceptions.TiOPSArgumentError(
             'No command specified, do nothing.')
     term.notice('Run raw shell command on {} cluster.'.format(
         self.topology.cluster_name))
     term.normal('{}'.format(self.cmd))
Пример #6
0
Файл: scale.py Проект: nrc/tiup
    def _process(self, component=None, pattern=None, node=None, role=None):
        _unhealth_node = []
        for _pd_node in self._cluster.status():
            if not _pd_node['health']:
                _unhealth_node.append(_pd_node['name'])
                msg = 'Some pd node is unhealthy, maybe server stoppd or network unreachable, unhealthy node list: {}'.format(
                    ','.join(_unhealth_node))
                term.fatal(msg)
                raise exceptions.TiOPSRuntimeError(msg, operation='scaleIn')

        _current_pd_num = len(self._pd_status)
        _current_tikv_num = len(self._tikv_stores)

        if 'pd_servers' in self._diff and len(
                self._diff['pd_servers']) == _current_pd_num:
            term.fatal('Can not delete all pd node.')
            exit(1)

        if 'tikv_servers' in self._diff and len(
                self._diff['tikv_servers']) == _current_tikv_num:
            term.fatal('Can not delete all tikv node.')
            exit(1)

        term.info('Check ssh connection.')
        self.act.check_ssh_connection()

        for service in self.topology.service_group[::-1]:
            component, pattern = self.check_exist(service, self._diff)
            if not component and not pattern:
                continue
            uuid = [x['uuid'] for x in self._diff[pattern]]
            term.normal('Delete {}, node list: {}'.format(
                component, ','.join(uuid)))
            for _uuid in uuid:
                self.__delete_component(self._diff, component, pattern, _uuid)
                if component not in ['tikv', 'pump', 'drainer']:
                    self.act.stop_component(component=component,
                                            pattern=pattern,
                                            node=_uuid)
                    self.act.destroy_component(component=component,
                                               pattern=pattern,
                                               node=_uuid)
                if component != 'blackbox_exporter':
                    self.topology.replace(self.topology.remove(_uuid)[0])
Пример #7
0
Файл: init.py Проект: nrc/tiup
    def check_os_version(self, facts=None):
        _lower_version = []

        for _host, _vars in facts['success'].iteritems():
            # get system version
            _sysversion = str(
                _vars['ansible_facts']['ansible_distribution_version'])
            if _sysversion < '7':
                _lower_version.append([_host, _sysversion])

        if _lower_version:
            term.fatal('Some machine\'s OS version dosen\'t support.')
            _length = max(max([len(str(x[0])) for x in _lower_version]),
                          len('IP'))
            term.normal('IP'.ljust(_length + 2) + 'OS_Version')
            for _node in _lower_version:
                term.normal('{}{}'.format(_node[0].ljust(_length + 2),
                                          _node[1]))
            exit(1)
Пример #8
0
    def _process(self, component=None, pattern=None, node=None, role=None):
        term.info('Check ssh connection.')
        self.act.check_ssh_connection()
        term.info('Stopping TiDB cluster.')
        for service in self.topology.service_group[::-1]:
            component, pattern = self.check_exist(service,
                                                  config=self.topology())
            if not component and not pattern:
                continue
            try:
                self.act.stop_component(component=component,
                                        pattern=pattern,
                                        node=node)
            except exceptions.TiOPSWarning as e:
                term.debug(str(e))
                pass

        for service in self.topology.service_group[::-1]:
            component, pattern = self.check_exist(service,
                                                  config=self.topology())
            if not component and not pattern:
                continue
            term.normal('{} is being destroyed.'.format(component))
            try:
                self.act.destroy_component(component=component,
                                           pattern=pattern,
                                           node=node)
            except exceptions.TiOPSWarning as e:
                term.debug(str(e))
                pass

        # remove deploy dir
        self.ans.run_model('shell',
                           'rm -rf {{ full_deploy_dir | cluster_dir }}',
                           become=True,
                           group='*')

        self.ans.run_model('shell',
                           'rm -rf {{ full_data_dir | cluster_dir }}',
                           become=True,
                           group='*')
Пример #9
0
Файл: init.py Проект: nrc/tiup
    def check_os_platform(self, facts=None):
        _unsupport_os = []

        # get operation system platform
        for _host, _vars in facts['success'].iteritems():
            _platform = _vars['ansible_facts']['ansible_os_family']
            if 'redhat' == _platform.lower():
                continue
            _unsupport_os.append([_host, _platform])

        if _unsupport_os:
            term.fatal(
                'Some machine\'s OS is not support, Please use Redhat / CentOS.'
            )
            _length = max(max([len(str(x[0])) for x in _unsupport_os]),
                          len('IP'))
            term.normal('IP'.ljust(_length + 2) + 'OS_Family')
            for _node in _unsupport_os:
                term.normal('{}{}'.format(_node[0].ljust(_length + 2),
                                          _node[1]))
            exit(1)
Пример #10
0
Файл: init.py Проект: nrc/tiup
    def init(self, demo=False):
        term.notice('Start init management machine.')
        key_home = utils.profile_path('.ssh')
        if not os.path.exists(key_home):
            utils.create_dir(key_home)
            os.chmod(os.path.join(key_home), 0o700)
        if not os.path.isfile(os.path.join(key_home, 'id_rsa')) or \
                not os.path.isfile(os.path.join(key_home, 'id_rsa.pub')):
            term.info('There is not SSH key. Start generating.'.format(
                getpass.getuser()))
            os.system(
                '/usr/bin/ssh-keygen -t rsa -N \'\' -f {}/id_rsa -q'.format(
                    key_home))
        else:
            term.normal('Already have SSH key, skip create.'.format(
                getpass.getuser()))

        if demo:
            term.notice('Finished init management machine.')
        else:
            term.notice('Done!!!')
Пример #11
0
    def _check_config(self):
        _servers = [
            {
                'pd': 'pd_servers'
            },
            {
                'tikv': 'tikv_servers'
            },
            {
                'tidb': 'tidb_servers'
            },
        ]

        for _service in _servers:
            _component, _pattern = self.check_exist(_service,
                                                    config=self.topology())
            if not _component and not _pattern:
                continue
            term.normal('Check {} configuration.'.format(_component))
            self.act.configCheck(component=_component,
                                 pattern=_pattern,
                                 node=self.topology()[_pattern][0]['uuid'])
Пример #12
0
Файл: scale.py Проект: nrc/tiup
    def _process(self, component=None, pattern=None, node=None, role=None):
        term.info('Check ssh connection.')
        self.act.check_ssh_connection()
        self.act.edit_file()
        try:
            term.info('Create directory in all add nodes.')
            for service in self.topology.service_group:
                component, pattern = self.check_exist(service, self._diff)
                if not component and not pattern:
                    continue
                uuid = [x['uuid'] for x in self._diff[pattern]]
                self.act.create_directory(component=component,
                                          pattern=pattern,
                                          node=','.join(uuid))

            # check machine cpu / memory / disk
            self.act.check_machine_config(self._diff)
            # start run scale-out
            for service in self.topology.service_group:
                component, pattern = self.check_exist(service, self._diff)
                if not component and not pattern:
                    continue
                uuid = [x['uuid'] for x in self._diff[pattern]]
                term.normal('Add {}, node list: {}.'.format(
                    component, ','.join(uuid)))
                _template_dir = self.topology.cache_template_dir
                self.act.deploy_component(component=component,
                                          pattern=pattern,
                                          node=','.join(uuid),
                                          template_dir=_template_dir)
                self.act.deploy_firewall(component=component,
                                         pattern=pattern,
                                         node=','.join(uuid))
                self.act.start_component(component=component,
                                         pattern=pattern,
                                         node=','.join(uuid))
        finally:
            os.popen('rm -rf {}'.format(self.topology.cache_template_dir))
Пример #13
0
    def _process(self, component=None, pattern=None, node=None, role=None):
        if node:
            term.notice('Reload specified node in cluster.')
        elif role:
            term.notice('Reload specified role in cluster.')
        else:
            term.notice('Reload TiDB cluster.')
        _topology = self.topology.role_node(roles=role, nodes=node)

        _cluster = modules.ClusterAPI(topology=self.topology)
        _unhealth_node = []
        for _pd_node in _cluster.status():
            if not _pd_node['health']:
                _unhealth_node.append(_pd_node['name'])
                msg = 'Some pd node is unhealthy, maybe server stoppd or network unreachable, unhealthy node list: {}'.format(
                    ','.join(_unhealth_node))
                term.fatal(msg)
                raise exceptions.TiOPSRuntimeError(msg, operation='reload')

        term.info('Check ssh connection.')
        self.act.check_ssh_connection()
        # every time should only contain one item
        for service in self.topology.service_group:
            component, pattern = self.check_exist(service=service,
                                                  config=_topology)
            if not component and not pattern:
                continue
            # upgrade pd server, upgrade leader node finally
            if component == 'pd':
                _pd_list = []
                for _node in _topology[pattern]:
                    if _node['uuid'] == _cluster.pd_leader():
                        _leader = _node
                    else:
                        _pd_list.append(_node)
                _pd_list.append(_leader)

                for _node in _pd_list:
                    _uuid = _node['uuid']
                    _host = _node['ip']
                    term.normal('Reload {}, node id: {}.'.format(
                        component, _uuid))
                    if _uuid == _cluster.pd_leader():
                        _cluster.evict_pd_leader(uuid=_uuid)

                    self.act.deploy_component(component=component,
                                              pattern=pattern,
                                              node=_uuid)
                    self.act.stop_component(component=component,
                                            pattern=pattern,
                                            node=_uuid)
                    self.act.start_component(component=component,
                                             pattern=pattern,
                                             node=_uuid)
                continue

            if pattern in [
                    'monitored_servers', 'monitoring_server', 'grafana_server',
                    'alertmanager_server'
            ]:
                if not node:
                    term.normal('Reload {}.'.format(component))
                    self.act.deploy_component(component=component,
                                              pattern=pattern)
                    self.act.stop_component(component=component,
                                            pattern=pattern)
                    self.act.start_component(component=component,
                                             pattern=pattern)
                else:
                    _uuid = [x['uuid'] for x in _topology[pattern]]
                    term.normal('Reload {}, node list: {}.'.format(
                        component, ','.join(_uuid)))
                    self.act.deploy_component(component=component,
                                              pattern=pattern,
                                              node=','.join(_uuid))
                    self.act.stop_component(component=component,
                                            pattern=pattern,
                                            node=','.join(_uuid))
                    self.act.start_component(component=component,
                                             pattern=pattern,
                                             node=','.join(_uuid))
                continue

            for _node in _topology[pattern]:
                _uuid = _node['uuid']
                _host = _node['ip']
                term.normal('Reload {}, node id: {}.'.format(component, _uuid))
                if pattern == 'tikv_servers':
                    _port = _node['port']
                    _cluster.evict_store_leaders(host=_host, port=_port)
                self.act.deploy_component(component=component,
                                          pattern=pattern,
                                          node=_uuid)
                self.act.stop_component(component=component,
                                        pattern=pattern,
                                        node=_uuid)
                self.act.start_component(component=component,
                                         pattern=pattern,
                                         node=_uuid)

                if pattern == 'tikv_servers':
                    _cluster.remove_evict(host=_host, port=_port)