def check_ntp(): check_service('ntpd') ntpserver = _get_ntpserver() if not ntpserver: LOG.error('Can not get ntp server, please check it.') else: LOG.debug('ntpserver is %s' % ntpserver)
def check_node_online(host): online = False (out, err) = ssh_connect(host, 'echo "online test"') if out.split('\n')[0] == 'online test': LOG.debug('Node %s is online .' % host) online = True return online
def check_selinux(): # Correct state [enforcing, permissive, disabled] correct_state = correct_conf = "disabled" # check current state (s, out) = commands.getstatusoutput('getenforce') current_state = out if s != 0: LOG.error('getenforce error, please check it') else: if current_state == correct_state.capitalize(): LOG.debug('SELinux current state is: %s' % current_state) else: LOG.warn('SELinux current state is: %s' % current_state) LOG.error('SELinux state need to be %s ' % correct_state.capitalize()) # check profile /etc/sysconfig/selinux current_conf = commands.getoutput( 'grep "^SELINUX=" /etc/sysconfig/selinux | cut -d "=" -f 2') if current_conf == correct_conf: LOG.debug('SELinux current conf in profile is: %s' % current_conf) else: LOG.warn('SELinux current conf in profile is: %s' % current_conf) LOG.error('SELinux configuration in profile need to be %s ' % correct_conf)
def rsync_plugin_modules_on_node(host, plugin_name, fuel_node_ip): LOG.debug('Sync plugin modules on node %s .' % host) plugin_version = get_plugin_version(plugin_name) modules_path = 'rsync://' \ + fuel_node_ip \ + ':/plugins/' \ + plugin_name \ + '-' \ + plugin_version \ + '/deployment_scripts/puppet/' plugin_dir = '/etc/fuel/plugins/' \ + plugin_name \ + '-' \ + plugin_version \ + '/puppet' (out, err) = ssh_connect(host, 'test -d %s || mkdir -p %s' % (plugin_dir, plugin_dir)) if err != '': log_split_output(err, 'error') (out, err) = ssh_connect(host, 'rsync -vzrtopg %s %s' % (modules_path, plugin_dir)) if err != '': log_split_output(err, 'error')
def push_yaml_to_node(host, src_path, dst_file_name): (out, err) = ssh_connect2(host, 'test -d /etc/hiera || mkdir /etc/hiera') if err == '': LOG.debug('Push %s to node %s .' % (src_path, host)) scp_connect(host, src_path, '/etc/hiera/%s' % dst_file_name) else: LOG.error('Can not create "/etc/hiera/" on node %s .' % host)
def check_node_online(host): online = False (out, err) = ssh_connect(host, 'echo "online test"') if out.split('\n')[0] == 'online test': LOG.debug('Node %s is online .' % host) online = True return online
def check_ntp(): check_service("ntpd") ntpserver = _get_ntpserver() if not ntpserver: LOG.error("Can not get ntp server, please check it.") else: LOG.debug("ntpserver is %s" % ntpserver)
def check_selinux(): # Correct state [enforcing, permissive, disabled] correct_state = correct_conf = "disabled" # check current state (s, out) = commands.getstatusoutput('getenforce') current_state = out if s != 0: LOG.error('getenforce error, please check it') else: if current_state == correct_state.capitalize(): LOG.debug('SELinux current state is: %s' % current_state) else: LOG.warn('SELinux current state is: %s' % current_state) LOG.error('SELinux state need to be %s ' % correct_state.capitalize()) # check profile /etc/sysconfig/selinux current_conf = commands.getoutput( 'grep "^SELINUX=" /etc/sysconfig/selinux | cut -d "=" -f 2') if current_conf == correct_conf: LOG.debug('SELinux current conf in profile is: %s' % current_conf) else: LOG.warn('SELinux current conf in profile is: %s' % current_conf) LOG.error('SELinux configuration in profile need to be %s ' % correct_conf)
def check_ntp(): check_service('ntpd') ntpserver = _get_ntpserver() if not ntpserver: LOG.error('Can not get ntp server, please check it.') else: LOG.debug('ntpserver is %s' % ntpserver)
def ping(peer,hostname,network_role): (status, out) = commands.getstatusoutput('ping -c 1 %s' % (peer)) if status == 0: LOG.debug('ping %s(%s) reached --- %s network' \ % (peer,hostname,network_role)) else: LOG.error('ping %s(%s) can not be reached --- %s network!' \ % (peer,hostname,network_role))
def check_node_profiles(role): component_list = eval('get_%s_component' % role)() for c in component_list: LOG.info('Checking "%s" Component' % c.capitalize()) profile_list = eval('get_%s_profiles' % c)() for p in profile_list: LOG.debug('Profile: ' + p) check_profile(p, role)
def check_node_profiles(role): component_list = eval('get_%s_component' % role)() for c in component_list: LOG.info('Checking "%s" Component' % c.capitalize()) profile_list = eval('get_%s_profiles' % c)() for p in profile_list: LOG.debug('Profile: ' + p) check_profile(p, role)
def check_disk(): limit = 85 vfs = os.statvfs("/") # get "/" filesystem space used percent used_percent = int(math.ceil((float(vfs.f_blocks - vfs.f_bavail) / float(vfs.f_blocks)) * 100)) if used_percent >= 0 and used_percent < limit: LOG.debug('The "/" filesystem used %s%% space !' % used_percent) elif used_percent >= limit: LOG.warn('The "/" filesystem used %s%% space !' % used_percent)
def check_mysql_connect(server, user, pwd, dbname): try: db = MySQLdb.connect(server, user, pwd, dbname) cursor = db.cursor() cursor.execute('SELECT VERSION()') cursor.fetchone() db.close() LOG.debug('Check Sucessfully.') except: LOG.error('Check Faild.')
def check_mysql_connect(server, user, pwd, dbname): try: db = MySQLdb.connect(server, user, pwd, dbname) cursor = db.cursor() cursor.execute('SELECT VERSION()') cursor.fetchone() db.close() LOG.debug('Check Sucessfully.') except: LOG.error('Check Faild.')
def check_component_availability(component, check_cmd): ENV_FILE_PATH = '/root/openrc' if os.path.exists(ENV_FILE_PATH): (s, o) = commands.getstatusoutput('source %s;' % ENV_FILE_PATH + check_cmd) if s == 0: LOG.debug('Check Successfully.') else: LOG.error('Check Faild.') LOG.error(o) else: LOG.error('Can not load environment variables from "%s".' % ENV_FILE_PATH)
def check_disk(): limit = 85 vfs = os.statvfs("/") # get "/" filesystem space used percent used_percent = int( math.ceil( (float(vfs.f_blocks - vfs.f_bavail) / float(vfs.f_blocks)) * 100)) if used_percent >= 0 and used_percent < limit: LOG.debug('The "/" filesystem used %s%% space !' % used_percent) elif used_percent >= limit: LOG.warn('The "/" filesystem used %s%% space !' % used_percent)
def check_component_availability(component, check_cmd): ENV_FILE_PATH = '/root/openrc' if os.path.exists(ENV_FILE_PATH): (s, o) = commands.getstatusoutput('source %s;' % ENV_FILE_PATH + check_cmd) if s == 0: LOG.debug('Check Successfully.') else: LOG.error('Check Faild.') LOG.error(o) else: LOG.error('Can not load environment variables from "%s".' % ENV_FILE_PATH)
def push_yaml_to_node(host, src_path, dst_file_name): (out, err) = ssh_connect2(host, 'test -d /etc/hiera || mkdir /etc/hiera') if err == '': LOG.debug('Push %s to node %s .' % (src_path, host)) scp_connect(host, src_path, '/etc/hiera/%s' % dst_file_name) else: LOG.error('Can not create "/etc/hiera/" on node %s .' % host)
def check_mongodb_connect(uri): from pymongo import MongoClient try: client = MongoClient(uri) db = client.ceilometer collection_list = db.collection_names() if collection_list: LOG.debug('Check Sucessfully.') except: LOG.error('Check Faild.') finally: db.logout()
def create_symbolic_links_on_openstack_node(nodes_info): LOG.info('Create symbolic links on openstack node.') for node in nodes_info: host = str(node['fuelweb_admin'].split('/')[0]) src_file = '/etc/astute.yaml' dst_file = '/etc/hiera/astute.yaml' cmd = 'test -h ' + dst_file + ' || ln -s ' + src_file + ' ' + dst_file (out, err) = ssh_connect(host, cmd) if err != '': LOG.error('Can not run command: %s on node %s .' % (cmd, host)) else: LOG.debug('Create symbolic links on node %s .' % host)
def check_mongodb_connect(uri): from pymongo import MongoClient try: client = MongoClient(uri) db = client.ceilometer collection_list = db.collection_names() if collection_list: LOG.debug('Check Sucessfully.') except: LOG.error('Check Faild.') finally: db.logout()
def create_symbolic_links_on_openstack_node(nodes_info): LOG.info('Create symbolic links on openstack node.') for node in nodes_info: host = str(node['fuelweb_admin'].split('/')[0]) src_file = '/etc/astute.yaml' dst_file = '/etc/hiera/astute.yaml' cmd = 'test -h ' + dst_file + ' || ln -s ' + src_file + ' ' + dst_file (out, err) = ssh_connect(host, cmd) if err != '': LOG.error('Can not run command: %s on node %s .' % (cmd, host)) else: LOG.debug('Create symbolic links on node %s .' % host)
def _generate_hiera_conf_file(file_path): hiera_conf = {':backends': ['yaml'], ':hierarchy': ['globals', 'astute'], ':yaml': {':datadir': '/etc/hiera'}, ':logger': ['noop']} # hiera_conf[':backends'] = ['yaml'] # hiera_conf[':hierarchy'] = ['globals', 'astute'] # hiera_conf[':yaml'] = {':datadir': '/etc/hiera'} # hiera_conf[':logger'] = ['noop'] LOG.debug('Generate %s' % file_path) with open(file_path, 'w') as hiera_yaml: hiera_yaml.write(yaml.dump(hiera_conf, default_flow_style=False))
def _generate_nailgun_repo_conf_file(file_path, fuel_node_ip): repo_name = 'nailgun' baseurl = 'http://' \ + fuel_node_ip \ + ':8080/2014.2.2-6.0.1/centos/x86_64' gpgcheck = 0 repo_conf = '[' + repo_name + ']' + '\n' \ + 'name=' + repo_name + '\n' \ + 'baseurl=' + baseurl + '\n' \ + 'gpgcheck=' + str(gpgcheck) LOG.debug('Generate %s' % file_path) with open(file_path, 'w') as repo_conf_file: repo_conf_file.write(repo_conf)
def vrouter_check_one(rid): cmd = 'neutron router-port-list %s -f csv -F id -F name' % (rid) out = run_command(cmd) ports = [] if out: ports = csv2dict(out) l3_host = vrouter_get_l3_host(rid) if l3_host: for port in ports: LOG.debug('start checking port %s[%s]' % (port['name'], port['id'])) port_check_one(port['id'], l3_host) LOG.debug('finish checking port %s[%s]' % (port['name'], port['id']))
def _generate_nailgun_repo_conf_file(file_path, fuel_node_ip): repo_name = 'nailgun' baseurl = 'http://' \ + fuel_node_ip \ + ':8080/2014.2.2-6.0.1/centos/x86_64' gpgcheck = 0 repo_conf = '[' + repo_name + ']' + '\n' \ + 'name=' + repo_name + '\n' \ + 'baseurl=' + baseurl + '\n' \ + 'gpgcheck=' + str(gpgcheck) LOG.debug('Generate %s' % file_path) with open(file_path, 'w') as repo_conf_file: repo_conf_file.write(repo_conf)
def check_service(name): (_, out) = commands.getstatusoutput('systemctl is-active %s.service' % (name)) if out == 'active': LOG.debug('Service %s is running ...' % name) else: LOG.error('Service %s is not running ...' % name) (_, out) = commands.getstatusoutput('systemctl is-enabled %s.service' % (name)) if 'enabled' in out: LOG.debug('Service %s is enabled ...' % name) else: LOG.error('Service %s is not enabled ...' % name)
def check_mongodb(): if NODE_ROLE.is_mongo(): role = NODE_ROLE.role if search_service('mongod'): LOG.error('mongod service was not found on %s node,please fix it' \ % role ) else: yaml_path = '/etc/astute.yaml' check_service('mongod') mongodb_stats = _get_mongodb_stats(yaml_path) if isinstance(mongodb_stats, dict): LOG.debug("mongod service is ok:%s" % mongodb_stats) else: LOG.error('mongod service is wrong:%s' % mongodb_stats)
def check_key_common(key, profile, template): current_value = get_value_common(key, profile) correct_value = get_value_common(key, template) filterfile = template + '.filter' if os.path.exists(filterfile): if get_value_common(key, filterfile) is '': LOG.debug('"%s = %s" option in the filter file, skip check this option.' % (key, current_value)) elif not correct_value: LOG.warn('Can not check following option, please check it by yourself. ') fmt_print('%s=%s' % (key, current_value)) elif current_value != correct_value: LOG.error('"%s" option check faild' % key) fmt_print('Current is "%s=%s"' % (key, current_value)) fmt_print('Correct is "%s=%s"' % (key, correct_value))
def check_service(name): (_, out) = commands.getstatusoutput( 'systemctl is-active %s.service' % (name)) if out == 'active': LOG.debug('Service %s is running ...' % name) else: LOG.error('Service %s is not running ...' % name) (_, out) = commands.getstatusoutput( 'systemctl is-enabled %s.service' % (name)) if 'enabled' in out: LOG.debug('Service %s is enabled ...' % name) else: LOG.error('Service %s is not enabled ...' % name)
def puppet_apply2(host_id, module_path, manifest): log_file = '/var/log/lma_collector_deployment.log' cmd = ('puppet apply --modulepath=%s -l %s --debug %s' % (module_path, log_file, manifest)) LOG.debug('Apply manifest %s on node-%s .' % (os.path.basename(manifest), host_id)) (_, _, exitcode) = run_command_on_node(host_id, cmd) if exitcode == 0: LOG.debug('Apply manifest %s on node-%s successfully .' % (os.path.basename(manifest), host_id)) else: LOG.error('Apply manifest %s on node-%s failed .' 'Please check %s on node-%s .' % (os.path.basename(manifest), host_id, log_file, host_id))
def check_key_common(key, profile, template): current_value = get_value_common(key, profile) correct_value = get_value_common(key, template) filterfile = template + '.filter' if os.path.exists(filterfile): if get_value_common(key, filterfile) is '': LOG.debug('"%s = %s" option in the filter file, skip check this option.' % (key, current_value)) elif not correct_value: LOG.warn('Can not check following option, please check it by yourself. ') fmt_print('%s=%s' % (key, current_value)) elif current_value != correct_value: LOG.error('"%s" option check faild' % key) fmt_print('Current is "%s=%s"' % (key, current_value)) fmt_print('Correct is "%s=%s"' % (key, correct_value))
def puppet_apply2(host_id, module_path, manifest): log_file = '/var/log/lma_collector_deployment.log' cmd = ('puppet apply --modulepath=%s -l %s --debug %s' % (module_path, log_file, manifest)) LOG.debug('Apply manifest %s on node-%s .' % (os.path.basename(manifest), host_id)) (_, _, exitcode) = run_command_on_node(host_id, cmd) if exitcode == 0: LOG.debug('Apply manifest %s on node-%s successfully .' % (os.path.basename(manifest), host_id)) else: LOG.error('Apply manifest %s on node-%s failed .' 'Please check %s on node-%s .' % (os.path.basename(manifest), host_id, log_file, host_id))
def vrouter_check_one(rid): logging.disable(logging.INFO) pc = PythonClient() ports = pc.neutron_router_port_list(rid) logging.disable(logging.NOTSET) l3_host = vrouter_get_l3_host(rid) if l3_host: for port in ports: LOG.debug('start checking port %s(%s)' % (port['name'], port['id'])) port_check_one(port['id'], l3_host) LOG.debug('finish checking port %s(%s)' % (port['name'], port['id']))
def port_check_one(pid, l3_host=None): def port_log(device_owner, s): if device_owner == 'network:router_gateway': LOG.warn(s) else: LOG.error(s) fmt = 'json' cmd = 'neutron port-show %s -f %s -F status -F admin_state_up '\ '-F device_owner -F device_id' % (pid, fmt) out = run_command(cmd) if out: detail = port_result_to_json(out, fmt) device_owner = detail['device_owner'] rid = detail['device_id'] if l3_host is None: # if l3_host is None, it is certain that the function is called # via command line, rather than vrouter_check_one, so it's ok # to call vrouter_get_l3_host here. l3_host = vrouter_get_l3_host(rid) # 1) check status of gateway port and interface port if detail['status'] != 'ACTIVE': port_log( device_owner, "status of port %s[%s] on %s is down" % (device_owner, pid, l3_host)) if not detail['admin_state_up']: port_log( device_owner, "admin_status of port %s[%s] on %s is down" % (device_owner, pid, l3_host)) # 2) ping external gateway to check network status LOG.debug('check gateway for port on %s' % (l3_host)) if device_owner == 'network:router_gateway': LOG.debug('this port is external port, check external gateway') gw = vrouter_get_gw_remote(l3_host, rid) if gw: LOG.debug("check external gateway %s on %s" % (gw, l3_host)) cmd = "ip netns exec qrouter-%s ping -c 1 %s" % (rid, gw) out, err = ssh_connect(l3_host, cmd) if not err: LOG.debug("external gateway is ok") else: LOG.error("failed to connect external gateway on %s" % (l3_host)) else: LOG.error("failed to get external gateway on %s" % (l3_host)) else: LOG.debug('this port is normal port, do not need to check gateway')
def push_repo_file_to_node(host, plugin_name, src_path, backup=False): LOG.debug('Push %s to node %s .' % (src_path, host)) if backup: ssh_connect2( host, 'test -e /etc/yum.repos.d/bak || mkdir /etc/yum.repos.d/bak/') (out, err) = ssh_connect2( host, 'mv /etc/yum.repos.d/*.repo /etc/yum.repos.d/bak/') if err == '': scp_connect(host, src_path, '/etc/yum.repos.d/%s.repo' % plugin_name) else: LOG.error('Can not backup "/etc/yum.repos.d/*.repo" on node %s .') else: scp_connect(host, src_path, '/etc/yum.repos.d/%s.repo' % plugin_name)
def check_rabbitmqrestart(): if NODE_ROLE.is_controller(): log_path = '/.eayunstack/rabbitmq_start_time' start_time = _get_from_ps() if os.path.exists(log_path): log_start_time = _get_from_log(log_path) if log_start_time == start_time: LOG.debug('service rabbitmq has never been restart') else: LOG.warn('service rabbitmq has been restart at %s' % start_time) _log_time(log_path, start_time) else: LOG.debug('the log file is not found') _log_time(log_path, start_time)
def check_ntp(): check_service('ntpd') (s, out) = commands.getstatusoutput( 'ntpstat | grep "synchronised to NTP server"') if s != 0: LOG.error('ntpstat error, please check it') return else: p = re.compile(r'.+\((.+)\).+') try: server = p.match(out).groups()[0] LOG.debug('ntpserver is %s' % server) except: LOG.error('except ntpstate error, please check it') return
def push_repo_file_to_node(host, plugin_name, src_path, backup=False): LOG.debug('Push %s to node %s .' % (src_path, host)) if backup: ssh_connect2(host, 'test -e /etc/yum.repos.d/bak || mkdir /etc/yum.repos.d/bak/') (out, err) = ssh_connect2(host, 'mv /etc/yum.repos.d/*.repo /etc/yum.repos.d/bak/') if err == '': scp_connect(host, src_path, '/etc/yum.repos.d/%s.repo' % plugin_name) else: LOG.error('Can not backup "/etc/yum.repos.d/*.repo" on node %s .') else: scp_connect(host, src_path, '/etc/yum.repos.d/%s.repo' % plugin_name)
def vrouter_check_one(rid): cmd = 'neutron router-port-list %s -f csv -F id -F name' % (rid) out = run_command(cmd) ports = [] if out: ports = csv2dict(out) l3_host = vrouter_get_l3_host(rid) if l3_host: for port in ports: LOG.debug('start checking port %s[%s]' % (port['name'], port['id'])) port_check_one(port['id'], l3_host) LOG.debug('finish checking port %s[%s]' % (port['name'], port['id']))
def check_cpu(): if not intel_pstate_enabled(): LOG.debug('kernel parameter "intel_pstate" was disabled.') return (status, out) = commands.getstatusoutput( "cpupower frequency-info | grep \"current policy\" | awk \'{print $7}\'" ) if status != 0: LOG.error('Can not get CPU min frequency !') return else: cpu_min_freq = out (status, out) = commands.getstatusoutput( "cpupower frequency-info | grep \"current policy\" | awk \'{print $8}\'" ) cpu_min_freq_unit = out (status, out) = commands.getstatusoutput( "cpupower frequency-info | grep \"current policy\" | awk \'{print $10}\'" ) if status != 0: LOG.error('Can not get CPU max frequency !') return else: cpu_max_freq = out (status, out) = commands.getstatusoutput( "cpupower frequency-info | grep \"current policy\" | awk \'{print $11}\'" ) cpu_max_freq_unit = out (status, out) = commands.getstatusoutput( "cpupower frequency-info | grep \"current CPU frequency\" | awk \'{print $5}\'" ) if status != 0: LOG.error('Can not get current CPU frequency !') return else: cpu_cur_freq = out (status, out) = commands.getstatusoutput( "cpupower frequency-info | grep \"current CPU frequency\" | awk \'{print $6}\'" ) cpu_cur_freq_unit = out if float(cpu_cur_freq) >= float(cpu_min_freq) and float( cpu_cur_freq) <= float(cpu_max_freq): LOG.debug('Current CPU Frequency: %s %s' % (cpu_cur_freq, cpu_cur_freq_unit)) else: LOG.error('Current CPU Frequency: %s %s. Not within %s %s and %s %s' % (cpu_cur_freq, cpu_cur_freq_unit, cpu_min_freq, cpu_min_freq_unit, cpu_max_freq, cpu_max_freq_unit))
def _generate_hiera_conf_file(file_path): hiera_conf = { ':backends': ['yaml'], ':hierarchy': ['globals', 'astute'], ':yaml': { ':datadir': '/etc/hiera' }, ':logger': ['noop'] } # hiera_conf[':backends'] = ['yaml'] # hiera_conf[':hierarchy'] = ['globals', 'astute'] # hiera_conf[':yaml'] = {':datadir': '/etc/hiera'} # hiera_conf[':logger'] = ['noop'] LOG.debug('Generate %s' % file_path) with open(file_path, 'w') as hiera_yaml: hiera_yaml.write(yaml.dump(hiera_conf, default_flow_style=False))
def _network_check_local(local_inf, nic_status): # 1) check if nic we need link is ok if NODE_ROLE.is_mongo(): local_inf = [i for i in local_inf if i["name"] not in ["br-storage", "br-prv"]] if NODE_ROLE.is_ceph_osd(): local_inf = [i for i in local_inf if i["name"] != "br-prv"] nic_need = [i["phy_port"] for i in local_inf] for nic in set(nic_need): # if two network roles use same nic, e.g. br-mgmt and br-fw-admin # use eno1, we can ignore it since we just want physic network nic inf = filter(lambda inf: inf["phy_port"] == nic, local_inf)[0] if nic_status[nic].lower() != "yes": LOG.error("Network card %s(%s) is not connected" % (nic, inf["name"])) else: LOG.debug("Network card %s(%s) connected" % (nic, inf["name"]))
def check_network(): nic_status = _network_get_nic_status() if NODE_ROLE.is_fuel(): LOG.debug('Network card information:') for i in nic_status.keys(): LOG.debug('%s: %s' % (i, nic_status[i])) return cfg = yaml.load(file('/etc/astute.yaml')) # check node's nic status local_inf = _network_local_network_inf(cfg) _network_check_local(local_inf, nic_status) # check if node can connect to other node remote_inf = _network_remote_network_inf(cfg) _network_check_remote(remote_inf)
def check_network(): nic_status = _network_get_nic_status() if NODE_ROLE.is_fuel(): LOG.debug('Network card information:') for i in nic_status.keys(): LOG.debug('%s: %s' % (i, nic_status[i])) return cfg = yaml.load(file('/etc/astute.yaml')) # check node's nic status local_inf = _network_local_network_inf(cfg) _network_check_local(local_inf, nic_status) # check if node can connect to other node remote_inf = _network_remote_network_inf(cfg) _network_check_remote(remote_inf)
def port_check_one(pid, l3_host=None): def port_log(device_owner, s): if device_owner == 'network:router_gateway': LOG.warn(s) else: LOG.error(s) fmt = 'json' cmd = 'neutron port-show %s -f %s -F status -F admin_state_up '\ '-F device_owner -F device_id' % (pid, fmt) out = run_command(cmd) if out: detail = port_result_to_json(out, fmt) device_owner = detail['device_owner'] rid = detail['device_id'] if l3_host is None: # if l3_host is None, it is certain that the function is called # via command line, rather than vrouter_check_one, so it's ok # to call vrouter_get_l3_host here. l3_host = vrouter_get_l3_host(rid) # 1) check status of gateway port and interface port if detail['status'] != 'ACTIVE': port_log(device_owner, "status of port %s[%s] on %s is down" % (device_owner, pid, l3_host)) if not detail['admin_state_up']: port_log(device_owner, "admin_status of port %s[%s] on %s is down" % (device_owner, pid, l3_host)) # 2) ping external gateway to check network status LOG.debug('check gateway for port on %s' % (l3_host)) if device_owner == 'network:router_gateway': LOG.debug('this port is external port, check external gateway') gw = vrouter_get_gw_remote(l3_host, rid) if gw: LOG.debug("check external gateway %s on %s" % (gw, l3_host)) cmd = "ip netns exec qrouter-%s ping -c 1 %s" % (rid, gw) out, err = ssh_connect(l3_host, cmd) if not err: LOG.debug("external gateway is ok") else: LOG.error("failed to connect external gateway on %s" % (l3_host)) else: LOG.error("failed to get external gateway on %s" % (l3_host)) else: LOG.debug('this port is normal port, do not need to check gateway')
def _puppet_apply(host, module_path, manifest): success = False log_file = '/var/log/deployment_influxdb.log' cmd = ('puppet apply --modulepath=%s -l %s --debug %s || echo $?' % (module_path, log_file, manifest)) LOG.info('Apply manifest %s on node %s .' % (os.path.basename(manifest), host)) (out, err) = ssh_connect(host, cmd) if out != '': LOG.error('Apply manifest %s on node %s failed .' 'Please check %s on node %s .' % (os.path.basename(manifest), host, log_file, host)) else: success = True LOG.debug('Apply manifest %s on node-%s successfully .' % (os.path.basename(manifest), host)) return success
def _puppet_apply(host, module_path, manifest): success = False log_file = '/var/log/deployment_influxdb.log' cmd = ('puppet apply --modulepath=%s -l %s --debug %s || echo $?' % (module_path, log_file, manifest)) LOG.info('Apply manifest %s on node %s .' % (os.path.basename(manifest), host)) (out, err) = ssh_connect(host, cmd) if out != '': LOG.error('Apply manifest %s on node %s failed .' 'Please check %s on node %s .' % (os.path.basename(manifest), host, log_file, host)) else: success = True LOG.debug('Apply manifest %s on node-%s successfully .' % (os.path.basename(manifest), host)) return success
def check_memory(): ''' Return the information in /proc/meminfo as a dictionary ''' limit = 85 meminfo=OrderedDict() with open('/proc/meminfo') as f: for line in f: meminfo[line.split(':')[0]] = line.split(':')[1].strip() Total = int((meminfo['MemTotal']).strip('kB')) / 1024.0 UseMemory = Total - (int((meminfo['MemFree']).strip('kB'))) / 1024.0 -(int((meminfo['Buffers']).strip('kB')))/1024.0 -(int((meminfo['Cached']).strip('kB'))) /1024.0 mem_per = UseMemory / Total * 100 if mem_per >= 0 and mem_per < limit: LOG.debug('The system memory has been used %.2f%%!' % mem_per) elif mem_per >= limit: LOG.error('The system memory has been used %.2f%%!' % mem_per)
def deployment_openstack_node(node, plugin_version): host_id = str(node['id']) host_roles = node['roles'] LOG.debug('Deploy lma_collector on node-%s .' % host_id) plugin_dir = '/etc/fuel/plugins/lma_collector-' \ + plugin_version \ + '/puppet/' manifest_path = plugin_dir + 'manifests/' module_path = plugin_dir + 'modules/' manifest_list = ['check_environment_configuration.pp', 'base.pp'] manifest_mapping = {'controller': ['controller.pp'], 'compute': ['compute.pp'], 'ceph-osd': ['ceph_osd.pp']} for role in host_roles: manifest_list.extend(manifest_mapping.get(role, [])) for manifest in manifest_list: puppet_apply2(host_id, module_path, manifest_path + manifest)
def _generate_astute_conf_file(file_path): influxdb_conf_def = { 'influxdb_dbname': 'lma', 'influxdb_username': '******', 'influxdb_userpass': '******', 'influxdb_rootpass': '******' } influxdb_conf = {} for key in influxdb_conf_def: influxdb_conf[key] = _get_param(key, influxdb_conf_def[key]) influxdb_conf['data_dir'] = '/opt/influxdb' influxdb_conf['node_name'] = 'influxdb' astute_conf = {'influxdb_grafana': influxdb_conf, 'user_node_name': 'influxdb', 'roles': ['base-os'], 'use_neutron': True} LOG.debug('Generate %s' % file_path) with open(file_path, 'w') as astute_yaml: astute_yaml.write(yaml.dump(astute_conf, default_flow_style=False))
def _generate_globals_conf_file(file_path, internal_address): f = open(ASTUTE_CONF_FILEPATH) astute = yaml.load(f) ff = open(COMMON_CONF_FILEPATH) common = yaml.load(ff) lma_conf = { 'environment_label': 'EayunStack', 'influxdb_mode': 'remote', 'influxdb_database': astute['influxdb_grafana']['influxdb_dbname'], 'influxdb_address': common['influxdb_address'], 'influxdb_user': astute['influxdb_grafana']['influxdb_username'], 'influxdb_password': astute['influxdb_grafana']['influxdb_userpass'], 'elasticsearch_mode': 'disabled', } globals_conf = {} globals_conf['lma_collector'] = lma_conf globals_conf['internal_address'] = internal_address LOG.debug('Generate %s' % file_path) with open(file_path, 'w') as globals_yaml: globals_yaml.write(yaml.dump(globals_conf, default_flow_style=False))
def _generate_globals_conf_file(file_path, internal_address): f = open(ASTUTE_CONF_FILEPATH) astute = yaml.load(f) ff = open(COMMON_CONF_FILEPATH) common = yaml.load(ff) lma_conf = { 'environment_label': 'EayunStack', 'influxdb_mode': 'remote', 'influxdb_database': astute['influxdb_grafana']['influxdb_dbname'], 'influxdb_address': common['influxdb_address'], 'influxdb_user': astute['influxdb_grafana']['influxdb_username'], 'influxdb_password': astute['influxdb_grafana']['influxdb_userpass'], 'elasticsearch_mode': 'disabled', } globals_conf = {} globals_conf['lma_collector'] = lma_conf globals_conf['internal_address'] = internal_address LOG.debug('Generate %s' % file_path) with open(file_path, 'w') as globals_yaml: globals_yaml.write(yaml.dump(globals_conf, default_flow_style=False))
def check_cpu(): if not intel_pstate_enabled(): LOG.debug('kernel parameter "intel_pstate" was disabled.') return (status, out) = commands.getstatusoutput("cpupower frequency-info | grep \"current policy\" | awk '{print $7}'") if status != 0: LOG.error("Can not get CPU min frequency !") return else: cpu_min_freq = out (status, out) = commands.getstatusoutput("cpupower frequency-info | grep \"current policy\" | awk '{print $8}'") cpu_min_freq_unit = out (status, out) = commands.getstatusoutput("cpupower frequency-info | grep \"current policy\" | awk '{print $10}'") if status != 0: LOG.error("Can not get CPU max frequency !") return else: cpu_max_freq = out (status, out) = commands.getstatusoutput( "cpupower frequency-info | grep \"current policy\" | awk '{print $11}'" ) cpu_max_freq_unit = out (status, out) = commands.getstatusoutput( "cpupower frequency-info | grep \"current CPU frequency\" | awk '{print $5}'" ) if status != 0: LOG.error("Can not get current CPU frequency !") return else: cpu_cur_freq = out (status, out) = commands.getstatusoutput( "cpupower frequency-info | grep \"current CPU frequency\" | awk '{print $6}'" ) cpu_cur_freq_unit = out if float(cpu_cur_freq) >= float(cpu_min_freq) and float(cpu_cur_freq) <= float(cpu_max_freq): LOG.debug("Current CPU Frequency: %s %s" % (cpu_cur_freq, cpu_cur_freq_unit)) else: LOG.error( "Current CPU Frequency: %s %s. Not within %s %s and %s %s" % (cpu_cur_freq, cpu_cur_freq_unit, cpu_min_freq, cpu_min_freq_unit, cpu_max_freq, cpu_max_freq_unit) )
def deployment_openstack_node(node, plugin_version): host_id = str(node['id']) host_roles = node['roles'] LOG.debug('Deploy lma_collector on node-%s .' % host_id) plugin_dir = '/etc/fuel/plugins/lma_collector-' \ + plugin_version \ + '/puppet/' manifest_path = plugin_dir + 'manifests/' module_path = plugin_dir + 'modules/' manifest_list = ['check_environment_configuration.pp', 'base.pp'] manifest_mapping = { 'controller': ['controller.pp'], 'compute': ['compute.pp'], 'ceph-osd': ['ceph_osd.pp'] } for role in host_roles: manifest_list.extend(manifest_mapping.get(role, [])) for manifest in manifest_list: puppet_apply2(host_id, module_path, manifest_path + manifest)
def check_memory(): ''' Return the information in /proc/meminfo as a dictionary ''' limit = 85 meminfo = OrderedDict() with open('/proc/meminfo') as f: for line in f: meminfo[line.split(':')[0]] = line.split(':')[1].strip() Total = int((meminfo['MemTotal']).strip('kB')) / 1024.0 UseMemory = Total - (int( (meminfo['MemFree']).strip('kB'))) / 1024.0 - (int( (meminfo['Buffers']).strip('kB'))) / 1024.0 - (int( (meminfo['Cached']).strip('kB'))) / 1024.0 mem_per = UseMemory / Total * 100 if mem_per >= 0 and mem_per < limit: LOG.debug('The system memory has been used %.2f%%!' % mem_per) elif mem_per >= limit: LOG.error('The system memory has been used %.2f%%!' % mem_per)
def _network_check_local(local_inf, nic_status): # 1) check if nic we need link is ok if NODE_ROLE.is_mongo(): local_inf = [ i for i in local_inf if i['name'] not in ['br-storage', 'br-prv'] ] if NODE_ROLE.is_ceph_osd(): local_inf = [i for i in local_inf if i['name'] != 'br-prv'] nic_need = [] for inf in local_inf: nic_need.extend(inf['phy_port']) for nic in set(nic_need): # if two network roles use same nic, e.g. br-mgmt and br-fw-admin # use eno1, we can ignore it since we just want physic network nic inf = filter(lambda inf: nic in inf['phy_port'], local_inf)[0] if nic_status[nic].lower() != 'yes': LOG.error('Network card %s(%s) is not connected' % (nic, inf['name'])) else: LOG.debug('Network card %s(%s) connected' % (nic, inf['name']))
def _network_check_local(local_inf, nic_status): # 1) check if nic we need link is ok if NODE_ROLE.is_mongo(): local_inf = [i for i in local_inf if i['name'] not in ['br-storage', 'br-prv']] if NODE_ROLE.is_ceph_osd(): local_inf = [i for i in local_inf if i['name'] != 'br-prv'] nic_need = [] for inf in local_inf: nic_need.extend(inf['phy_port']) for nic in set(nic_need): # if two network roles use same nic, e.g. br-mgmt and br-fw-admin # use eno1, we can ignore it since we just want physic network nic inf = filter(lambda inf: nic in inf['phy_port'], local_inf)[0] if nic_status[nic].lower() != 'yes': LOG.error('Network card %s(%s) is not connected' % (nic, inf['name'])) else: LOG.debug('Network card %s(%s) connected' % (nic, inf['name']))
def check_key(section, key, profile, template): pp = ConfigParser.ConfigParser() pp.read(profile) pt = ConfigParser.ConfigParser() pt.read(template) current_value = dict(pp.items(section))[key] filterfile = template + '.filter' if os.path.exists(filterfile): pf = ConfigParser.ConfigParser() pf.read(filterfile) try: dict(pf.items(section))[key] LOG.debug('[%s] ==> "%s = %s" option in the filter file, skip check this option.' % (section, key, current_value)) return False, None except: pass current_value = dict(pp.items(section))[key] try: correct_value = dict(pt.items(section))[key] # there is no this section in the template file except ConfigParser.NoSectionError: correct_value = current_value return True, current_value # there is no this key in the section except KeyError: correct_value = current_value return True, current_value # if the key in profile and template didn't matched, check faild if current_value != correct_value: LOG.error('[%s] ' % section) LOG.error('"%s" option check faild' % key) fmt_print('Current is "%s=%s"' % (key, current_value)) fmt_print('Correct is "%s=%s"' % (key, correct_value)) return False, None else: return False, None