def check_systemd_version(self, info=None): # info: systemd info callback by ansible _lower_version = [] for _host, _vars in info['success'].iteritems(): for systemd_info in _vars['results']: # get sysmted version if systemd_info['yumstate'] == 'installed': _version = '{}-{}'.format(systemd_info['version'], systemd_info['release']) # when version less than 219-52.el7, will record if _version < '219-52.el7': _lower_version.append([_host, _version]) if _lower_version: term.warn( 'Some machine\'s systemd service version lower than "219-52.el7".' ) _length = max(max([len(str(x[0])) for x in _lower_version]), len('IP')) term.normal('IP'.ljust(_length + 2) + 'Systemd_Version') for _node in _lower_version: term.normal('{}{}'.format(_node[0].ljust(_length + 2), _node[1])) term.warn( 'There are some memory bugs in lower systemd version(lower than "219-52.el7"). ' 'Refer to https://access.redhat.com/discussions/3536621.') if not utils.ticontinue(): exit(1)
def _process(self, component=None, pattern=None, node=None, role=None): # creart directory term.info('Create directory in all nodes.') for service in self.topology.service_group: component, pattern = self.check_exist(service, config=self.topology()) if not component and not pattern: continue self.act.create_directory(component=component, pattern=pattern) if not self.demo: self.act.check_machine_config() # start run deploy if self.demo: term.warn( 'FirewallD is being disabled on deployment machines in quick deploy mode.' ) for service in self.topology.service_group: component, pattern = self.check_exist(service, config=self.topology()) if not component and not pattern: continue term.normal('Deploy {}.'.format(component)) self.act.deploy_component(component=component, pattern=pattern) self.act.deploy_firewall(component=component, pattern=pattern) if not self.demo: self.act.deploy_tool()
def check_hostname(self, facts=None): # facts is ansible callback _hostname_list = {} for _host, _vars in facts['success'].iteritems(): _hostname = _vars['ansible_facts']['ansible_hostname'] if _hostname_list.has_key(_hostname): _hostname_list[_hostname].append(_host) else: _hostname_list[_hostname] = [_host] # check if have conflict hostname between different host _cache_hostname_list = copy.deepcopy(_hostname_list) for _host_name, _ip in _hostname_list.iteritems(): if len(_ip) == 1: del _cache_hostname_list[_host_name] if _cache_hostname_list: term.warn("Some machine\'s hostname conflict.") _length = max( max([len(str(x)) for x in _cache_hostname_list.keys()]), len('Hostname')) term.normal('Hostname'.ljust(_length + 2) + 'Hosts') for _hostname, _hosts in _cache_hostname_list.iteritems(): term.normal('{}{}'.format(_hostname.ljust(_length + 2), ', '.join(_hosts))) if not utils.ticontinue(): exit(1)
def __init__(self, args=None, topology=None, new_srvs=None): if os.path.exists(topology.topology_file): term.warn( 'Check TiDB cluster {} status, it may take a few minutes.'. format(topology.cluster_name)) self.check_tombstone(topology, args) self._new_topo, self._diff = topology.add(new_srvs) topology.replace(self._new_topo, write=False) super(OprScaleOut, self).__init__(args, topology, action='deploy') self.act = Action(ans=self.ans, topo=self.topology)
def _prepare(self, component=None, pattern=None, node=None, role=None): term.warn('The TiDB cluster {} ({}) is going to be destroyed.'.format( self.topology.cluster_name, self.topology.version)) rm_promt = 'This operation will ' + term.warn_red('remove') \ + ' the TiDB cluster ' + term.highlight_red(self.topology.cluster_name) \ + '. It can NOT be undone. ' + term.yes_no() + ':' notice = term.input(rm_promt) if notice.lower() not in ['y', 'yes']: term.notice('Terminate the destroy operation.') raise exceptions.TiOPSRuntimeError('Operation cancelled by user.')
def __init__(self, args=None, topology=None, demo=False, action=None): try: self._lock_profile() except NotImplementedError: pass except exceptions.TiOPSException: # TODO: add more specific handlers raise self.topology = topology self._args = args if not demo and os.path.exists( self.topology.topology_file) and not action: term.warn( 'Check TiDB cluster {} status, it may take a few minutes.'. format(self.topology.cluster_name)) self.check_tombstone() self.ans = ansibleapi.ANSRunner(user=self.topology.user, topology=self.topology(), tiargs=self._args)
def run_playbook(self, playbook_path, extra_vars=None): """ 运行playbook """ try: self.callback = PlayBookResultsCollector() if extra_vars: self.variable_manager.extra_vars = extra_vars executor = PlaybookExecutor( playbooks=[playbook_path], inventory=self.inventory, variable_manager=self.variable_manager, loader=self.loader, options=self.options, passwords=self.password, ) executor._tqm._stdout_callback = self.callback executor.run() except Exception as e: term.warn(str(e)) return False
def main(args=None): try: action = args.action except AttributeError: pass if action == 'version': print(term.plain(TiOPSVer())) exit(0) if action == 'quickdeploy': term.warn( 'The quick deploy mode is for demo and testing, do NOT use in production!' ) # do init _init = init.Init(args) try: _init.init(demo=True) _init.init_network(demo=True) _init.init_host(demo=True) except TiOPSRuntimeError as e: tierror(e) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) # do deploy topo = topology.Topology(args=args, merge=True) try: op.OprDeploy(args, topo, demo=True).do() op.OprStart(args, topo, demo=True).do() tm.TUIModule(topo, args=args).display() except TiOPSRuntimeError as e: tierror(e) except TiOPSRequestError as e: msg = "{}, URL {} returned {}, please check the network and try again.".format( e.msg, e.url, e.code) term.error(msg) sys.exit(1) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) elif action == 'bootstrap-local': _init = init.Init(args) try: _init.init() except TiOPSRuntimeError as e: tierror(e) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) elif action == 'bootstrap-ssh': _init = init.Init(args) try: _init.init_network() except TiOPSRuntimeError as e: tierror(e) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) elif action == 'bootstrap-host': _init = init.Init(args) try: _init.init_host() except TiOPSRuntimeError as e: tierror(e) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) else: try: if action not in ['deploy', 'display']: topo = topology.Topology(args) except TiOPSRuntimeError as e: tierror(e) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) if action == 'display': try: _cluster_name = args.cluster_name except AttributeError: _cluster_name = None try: if _cluster_name and len(_cluster_name) > 0: topo = topology.Topology(args) _list = False else: topo = None _list = True tm.TUIModule(topo, args=args).display(_list) except TiOPSRuntimeError as e: tierror(e) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) elif action == 'deploy': topo = topology.Topology(args=args, merge=True) try: op.OprDeploy(args, topo).do() tm.TUIModule(topo, args=args).display() except TiOPSRuntimeError as e: tierror(e) except TiOPSRequestError as e: msg = "{}, URL {} returned {}, please check the network and try again.".format( e.msg, e.url, e.code) term.error(msg) sys.exit(1) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) elif action == 'start': try: op.OprStart(args, topo).do(node=args.node_id, role=args.role) tm.TUIModule(topo, args=args, status=True).display() except TiOPSRuntimeError as e: tierror(e) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) elif action == 'stop': try: op.OprStop(args, topo).do(node=args.node_id, role=args.role) except TiOPSRuntimeError as e: tierror(e) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) elif action == 'restart': try: op.OprRestart(args, topo).do(node=args.node_id, role=args.role) except TiOPSRuntimeError as e: tierror(e) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) elif action == 'reload': try: op.OprReload(args, topo).do(node=args.node_id, role=args.role) except TiOPSRuntimeError as e: tierror(e) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) elif action == 'upgrade': try: op.OprUpgrade(args, topo).do(node=args.node_id, role=args.role) except TiOPSRuntimeError as e: tierror(e) except TiOPSRequestError as e: msg = "{}, URL {} returned {}, please check the network and try again.".format( e.msg, e.url, e.code) term.error(msg) sys.exit(1) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) elif action == 'destroy': try: op.OprDestroy(args, topo).do() except TiOPSRuntimeError as e: tierror(e) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) elif action == 'edit-config': try: Action(topo=topo).edit_file() except TiOPSRuntimeError as e: tierror(e) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) elif action == 'scale-out': addTopo = utils.read_yaml(args.topology) try: op.OprScaleOut(args, topo, addTopo).do() except TiOPSRuntimeError as e: tierror(e) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) elif action == 'scale-in': try: op.OprScaleIn(args, topo, args.node_id).do(node=args.node_id) except TiOPSRuntimeError as e: tierror(e) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1) elif action == 'exec': try: op.OprExec(args, topo).do(node=args.node_id, role=args.role) except TiOPSRuntimeError as e: tierror(e) except TiOPSException as e: term.debug(traceback.format_exc()) term.fatal(str(e)) sys.exit(1)
def run_model(self, module_name, module_args, become=False, register=None, with_items=None, group='*', extra_vars=None, node=None): """ run module from andible ad-hoc. module_name: ansible module_name module_args: ansible module args """ if self.topology: service_names = {'node_exporter': ['monitored_servers', 'node_exporter_port'], 'blackbox_exporter': ['monitored_servers', 'blackbox_exporter_port'], 'prometheus': ['monitoring_server', 'prometheus_port'], 'pushgateway': ['monitoring_server', 'pushgateway_port']} if extra_vars in service_names and service_names[extra_vars][0] in self.inventory.get_groups_dict(): for host in self.inventory.get_groups_dict()[service_names[extra_vars][0]]: hostname = self.inventory.get_host(hostname=host) service_name = '{}-{}'.format(extra_vars, self.variable_manager.get_vars( host=hostname)[service_names[extra_vars][1]]) self.variable_manager.set_host_variable( host=hostname, varname='service_name', value=service_name) if self.cluster_name and extra_vars: self.variable_manager.extra_vars = { 'cluster_name': self.cluster_name, 'service': extra_vars} else: self.variable_manager.extra_vars = { 'cluster_name': self.cluster_name} if register and with_items: task = [dict(action=dict(module=module_name, args=module_args), become=become, register=register, with_items=with_items)] elif register is None and with_items: task = [dict(action=dict(module=module_name, args=module_args), become=become, with_items=with_items)] elif register and with_items is None: task = [dict(action=dict(module=module_name, args=module_args), become=become, register=register)] else: task = [dict(action=dict(module=module_name, args=module_args), become=become)] if node: node_list = node.split(',') if len(node_list) == 1: node_str = '{},'.format(node) else: node_str = ','.join(node_list) play_source = dict( name="Ansible Play", hosts=self.ips if self.ips else (node_str if node else group), gather_facts='no', tasks=task ) play = Play().load(play_source, variable_manager=self.variable_manager, loader=self.loader) tqm = None self.callback = ModelResultsCollector() import traceback try: tqm = TaskQueueManager( inventory=self.inventory, variable_manager=self.variable_manager, loader=self.loader, options=self.options, passwords=self.password, stdout_callback="minimal", ) tqm._stdout_callback = self.callback tqm.run(play) except Exception as e: term.warn(str(e)) term.debug(traceback.print_exc()) finally: if tqm is not None: tqm.cleanup() result = self.get_model_result() failed = {} unreachable = {} offline_list = [] if self.topology: for grp in ['drainer_servers', 'pump_servers', 'tikv_servers']: if not self.topology.has_key(grp) or not self.topology[grp]: continue for _node in self.topology[grp]: if _node['offline']: offline_list.append(_node['uuid']) if result['success']: for _uuid, _info in result['success'].iteritems(): _ip = _info['ansible_host'] if _info.has_key('stderr') and _info['stderr']: try: failed[_uuid][_ip].append(_info['stderr']) except: if not failed.has_key(_uuid): failed[_uuid] = {} failed[_uuid][_ip] = [_info['stderr']] if result['failed']: for _uuid, _info in result['failed'].iteritems(): _ip = _info['ansible_host'] if _info.has_key('stderr') and _info['stderr']: try: failed[_uuid][_ip].append(_info['stderr']) except: if not failed.has_key(_uuid): failed[_uuid] = {} failed[_uuid][_ip] = [_info['stderr']] if _info.has_key('stdout') and _info['stdout']: try: failed[_uuid][_ip].append(_info['stdout']) except: if not failed.has_key(_uuid): failed[_uuid] = {} failed[_uuid][_ip] = [_info['stdout']] if _info.has_key('msg') and \ _info['msg'] and \ "'full_data_dir' is undefined" not in _info['msg'] and \ not re.search(r'Could not find.*firewalld', _info['msg']): if _uuid in offline_list and re.search(r'the.*port.*is not up', _info['msg']): continue try: failed[_uuid][_ip].append(_info['msg']) except: if not failed.has_key(_uuid): failed[_uuid] = {} failed[_uuid][_ip] = [_info['msg']] if result['unreachable']: for _uuid, _info in result['unreachable'].iteritems(): _ip = _info['ansible_host'] if _info.has_key('stderr') and _info['stderr']: try: unreachable[_uuid][_ip].append(_info['stderr']) except: if not unreachable.has_key(_uuid): unreachable[_uuid] = {} unreachable[_uuid][_ip] = [_info['stderr']] if _info.has_key('stdout') and _info['stdout']: try: unreachable[_uuid][_ip].append(_info['stdout']) except: if not unreachable.has_key(_uuid): unreachable[_uuid] = {} unreachable[_uuid][_ip] = [_info['stdout']] if _info.has_key('msg') and _info['msg']: try: unreachable[_uuid][_ip].append(_info['msg']) except: if not unreachable.has_key(_uuid): unreachable[_uuid] = {} unreachable[_uuid][_ip] = [_info['msg']] if not failed and not unreachable: return result msg = {} msg['failed'] = failed msg['unreachable'] = unreachable raise exceptions.TiOPSRuntimeError(msg, result, tp='ansible')