def kinit(user, pswd, realm): try: cmd_to_run = 'echo "%s" | kinit %s@%s' % (pswd, user, realm.upper()) # print cmd_to_run use_salt, err = config.use_salt() if err: raise Exception(err) if use_salt: import salt.client errors = [] client = salt.client.LocalClient() # print 'Running %s'%cmd_to_run #assert False r1 = client.cmd('*', 'cmd.run_all', [cmd_to_run]) if r1: for node, ret in r1.items(): # print ret if ret["retcode"] != 0: e = "Error initiating kerberos on GRIDCell %s" % node if "stderr" in ret: e += " : %s" % ret["stderr"] errors.append(e) print errors # print r1 if errors: raise Exception(' '.join(errors)) else: lines, err = command.get_command_output(cmd_to_run, shell=True) # print lines, err if err: raise Exception(err) except Exception, e: return False, 'Error initializing kerberos : %s' % str(e)
def reload_configuration(): try: use_salt, err = config.use_salt() if err: raise Exception(err) if use_salt: import salt.client errors = '' client = salt.client.LocalClient() r1 = client.cmd('*', 'cmd.run_all', ['smbcontrol all reload-config']) if r1: for node, ret in r1.items(): # print ret if ret["retcode"] != 0: errors += "Error reloading samba on node %s " % node if errors: raise Exception(errors) else: cmd_to_run = 'smbcontrol all reload-config' lines, err = command.get_command_output(cmd_to_run) if err: raise Exception(err) ret, err = services_management.update_service_status( 'winbind', 'restart') if err: raise Exception(err) except Exception, e: return False, 'Error reloading CIFS configuration : %s' % str(e)
def delete_local_user(username): try: if not username: raise Exception('No username specified') d, err = get_local_user(username) if not d: if err: raise Exception('Error locating user : %s' % err) else: raise Exception('Error locating user') use_salt, err = config.use_salt() if err: raise Exception(err) if use_salt: import salt.client client = salt.client.LocalClient() rc = client.cmd('*', 'user.delete', [username]) # print rc if rc: for hostname, status in rc.items(): if not status: raise Exception("Error deleting the system user") else: raise Exception("Error deleting the system user") else: cmd_to_run = 'userdel %s' % (username) lines, err = command.get_command_output(cmd_to_run) if err: raise Exception(err) if d['smb_user']: lines, err = command.get_command_output(r'pdbedit -d 1 -x %s' % username) if err: raise Exception(err) except Exception, e: return False, 'Error deleting local user : %s' % str(e)
def create_local_group(grpname, gid=None): try: # First check if groups exists. if so kick out gl, err = get_local_groups() if gl: for gd in gl: if gd["grpname"] == grpname: raise Exception( "Error creating group. The group \"%s\" already exists. " % grpname) elif err: raise Exception("Error retrieving group list : %s" % err) use_salt, err = config.use_salt() if err: raise Exception(err) if use_salt: import salt.client client = salt.client.LocalClient() if not gid: rc = client.cmd('*', 'group.add', [grpname]) else: rc = client.cmd('*', 'group.add', [grpname, gid]) print rc if not rc: raise Exception('Group creation failed') for hostname, status in rc.items(): if not status: raise Exception('Group creation failed') else: if gid: cmd_to_run = 'groupadd -g %s %s' % (gid, grpname) else: cmd_to_run = 'groupadd %s' % (grpname) lines, err = command.get_command_output(cmd_to_run) if err: raise Exception(err) except Exception, e: return False, 'Error creating a local group : %s' % str(e)
def display_status(): try: hostname = socket.gethostname() use_salt, err = config.use_salt() if err: raise Exception(err) if use_salt: print "Salt master service status :", (r, rc), err = command.execute_with_rc('service salt-master status') if err: raise Exception(err) l, err = command.get_output_list(r) if err: raise Exception(err) if l: print '\n'.join(l) else: l, err = command.get_error_list(r) if err: raise Exception(err) if l: print '\n'.join(l) print "Salt minion service status :", (r, rc), err = command.execute_with_rc('service salt-minion status') if err: raise Exception(err) l, err = command.get_output_list(r) if err: raise Exception(err) if l: print '\n'.join(l) else: l, err = command.get_error_list(r) if err: raise Exception(err) print l if l: print '\n'.join(l) print "Samba service status :", (r, rc), err = command.execute_with_rc('service smb status') if err: raise Exception(err) l, err = command.get_output_list(r) if err: raise Exception(err) if l: print '\n'.join(l) else: l, err = command.get_error_list(r) if err: raise Exception(err) if l: print '\n'.join(l) print "Winbind service status :", (r, rc), err = command.execute_with_rc('service winbind status') if err: raise Exception(err) l, err = command.get_output_list(r) if err: raise Exception(err) if l: print '\n'.join(l) else: l, err = command.get_error_list(r) if err: raise Exception(err) if l: print '\n'.join(l) except Exception, e: print "Error displaying system status : %s" % e return -1
def configure_interface(): try: os.system('clear') interfaces, err = networking.get_interfaces() if err: raise Exception( 'Error retrieving interface information : %s' % err) if not interfaces: raise Exception('No interfaces detected') print print print 'IntegralSTOR interface configuration' print '--------------------------------------------' print print print 'Current network interfaces : ' print for if_name, iface in interfaces.items(): if if_name.startswith('lo'): continue print '- %s' % if_name print valid_input = False while not valid_input: ifname = raw_input( 'Enter the name of the interface that you wish to configure : ') if ifname not in interfaces or ifname.startswith('lo'): print 'Invalid interface name' else: valid_input = True print ip_info, err = networking.get_ip_info(ifname) ''' if err: raise Exception('Error retrieving interface information : %s'%err) ''' if ip_info: ip = ip_info["ipaddr"] netmask = ip_info["netmask"] if "default_gateway" in ip_info: gateway = ip_info["default_gateway"] else: gateway = None else: ip = None netmask = None gateway = None old_boot_proto, err = networking.get_interface_bootproto(ifname) if err: raise Exception( 'Error retrieving interface information : %s' % err) time.sleep(5) config_changed = False str_to_print = "Configure for DHCP or static addressing (dhcp/static)? : " valid_input = False while not valid_input: input = raw_input(str_to_print) if input: if input.lower() in ['static', 'dhcp']: valid_input = True boot_proto = input.lower() if boot_proto != old_boot_proto: config_changed = True if not valid_input: print "Invalid value. Please try again." print if boot_proto == 'static': if ip: str_to_print = "Enter IP address (currently %s, press enter to retain current value) : " % ip else: str_to_print = "Enter IP address (currently not set) : " valid_input = False while not valid_input: input = raw_input(str_to_print) if input: ok, err = networking.validate_ip(input) if err: raise Exception('Error validating IP : %s' % err) if ok: valid_input = True ip = input config_changed = True elif ip: valid_input = True if not valid_input: print "Invalid value. Please try again." print if netmask: str_to_print = "Enter netmask (currently %s, press enter to retain current value) : " % netmask else: str_to_print = "Enter netmask (currently not set) : " valid_input = False while not valid_input: input = raw_input(str_to_print) if input: ok, err = networking.validate_netmask(input) if err: raise Exception('Error validating netmask : %s' % err) if ok: valid_input = True netmask = input config_changed = True elif netmask: valid_input = True if not valid_input: print "Invalid value. Please try again." print if gateway: str_to_print = "Enter gateway (currently %s, press enter to retain current value) : " % gateway else: str_to_print = "Enter gateway (currently not set) : " valid_input = False while not valid_input: input = raw_input(str_to_print) if input: ok, err = networking.validate_ip(input) if err: raise Exception('Error validating gateway : %s' % err) if ok: valid_input = True gateway = input config_changed = True elif gateway: valid_input = True if not valid_input: print "Invalid value. Please try again." print if config_changed: d = {} d['addr_type'] = boot_proto if boot_proto == 'static': d['ip'] = ip d['netmask'] = netmask d['default_gateway'] = gateway ret, err = networking.update_interface_ip(ifname, d) if not ret: if err: raise Exception( 'Error changing interface address : %s' % err) else: raise Exception('Error changing interface address') restart = False print print valid_input = False while not valid_input: str_to_print = 'Restart network services now (y/n) :' print input = raw_input(str_to_print) if input: if input.lower() in ['y', 'n']: valid_input = True if input.lower() == 'y': restart = True if not valid_input: print "Invalid value. Please try again." print if restart: ret, err = networking.restart_networking() if not ret: if err: raise Exception(err) else: raise Exception("Couldn't restart.") use_salt, err = config.use_salt() if err: raise Exception(err) if use_salt: (r, rc), err = command.execute_with_rc( 'service salt-minion restart') if err: raise Exception(err) if rc == 0: print "Salt minion service restarted succesfully." else: print "Error restarting salt minion services." raw_input('Press enter to return to the main menu') return -1 else: print print raw_input( 'No changes have been made to the configurations. Press enter to return to the main menu.') return 0 except Exception, e: print "Error configuring network settings : %s" % e return -1
def replace_disk(request): return_dict = {} try: form = None si, err = system_info.load_system_config() if err: raise Exception(err) if not si: raise Exception('Error loading system config') return_dict['system_config_list'] = si template = 'logged_in_error.html' use_salt, err = config.use_salt() if err: raise Exception(err) if request.method == "GET": raise Exception("Incorrect access method. Please use the menus") else: if 'node' in request.POST: node = request.POST["node"] else: node = si.keys()[0] serial_number = request.POST["serial_number"] if "conf" in request.POST: if "node" not in request.POST or "serial_number" not in request.POST: raise Exception( "Incorrect access method. Please use the menus") elif request.POST["node"] not in si: raise Exception("Unknown node. Please use the menus") elif "step" not in request.POST: raise Exception("Incomplete request. Please use the menus") elif request.POST["step"] not in [ "replace_method", "select_replacement_disk", "offline_disk", "scan_for_new_disk", "online_new_disk" ]: raise Exception("Incomplete request. Please use the menus") else: step = request.POST["step"] # Which step of the replace disk are we in? if step == "offline_disk": # get the pool corresponding to the disk # zpool offline pool disk # send a screen asking them to replace the disk if 'replacement_method' not in request.POST or request.POST[ 'replacement_method'] not in [ 'use_existing_disk', 'swap_out_disk' ]: raise Exception('Invalid request') return_dict['replacement_method'] = request.POST[ 'replacement_method'] if request.POST[ 'replacement_method'] == 'use_existing_disk': # Then we should have landed here after already # selecting the new disk so get and record the new # disk details if 'new_serial_number' not in request.POST: raise Exception( 'Incomplete request. Please try again') new_serial_number = request.POST[ 'new_serial_number'] all_disks, err = disks.get_disk_info_all() if new_serial_number not in all_disks: raise Exception('Invalid disk selection') # print new_serial_number # print all_disks[new_serial_number]['id'] return_dict[ 'new_serial_number'] = new_serial_number return_dict['new_id'] = all_disks[ new_serial_number]['id'] pool = None if serial_number in si[node]["disks"]: disk = si[node]["disks"][serial_number] if "pool" in disk: pool = disk["pool"] disk_id = disk["id"] if not pool: raise Exception( "Could not find the storage pool on that disk. Please use the menus" ) else: cmd_to_run = 'zpool offline %s %s' % (pool, disk_id) # print 'Running %s'%cmd_to_run #assert False ret, err = command.get_command_output(cmd_to_run) if err: raise Exception(err) audit_str = "Replace disk - Disk with serial number %s brought offline" % serial_number audit.audit("replace_disk_offline_disk", audit_str, request) return_dict["serial_number"] = serial_number return_dict["node"] = node return_dict["pool"] = pool return_dict["old_id"] = disk_id template = "replace_disk_offlined_conf.html" elif step == "replace_method": return_dict["node"] = node return_dict["serial_number"] = serial_number template = "replace_disk_method.html" elif step == "select_replacement_disk": if 'replacement_method' not in request.POST or request.POST[ 'replacement_method'] not in [ 'use_existing_disk', 'swap_out_disk' ]: raise Exception('Invalid request') return_dict['replacement_method'] = request.POST[ 'replacement_method'] return_dict["node"] = node return_dict["serial_number"] = serial_number free_disks, err = zfs.get_free_disks() if err: raise Exception(err) if not free_disks: raise Exception( 'There are no unused disks presently') return_dict['free_disks'] = free_disks template = "replace_disk_choose_disk.html" elif step == "scan_for_new_disk": # they have replaced the disk so scan for the new disk # and prompt for a confirmation of the new disk serial # number pool = request.POST["pool"] old_id = request.POST["old_id"] return_dict["node"] = node return_dict["serial_number"] = serial_number return_dict["pool"] = pool return_dict["old_id"] = old_id old_disks = si[node]["disks"].keys() result = False rc, err = manifest_status.get_disk_info_and_status() if err: raise Exception(err) if rc: result = True new_disks = rc if result: # print '1' if new_disks: # print new_disks.keys() # print old_disks for disk in new_disks.keys(): # print disk if disk not in old_disks: # print 'new disk : ', disk return_dict[ "inserted_disk_serial_number"] = disk return_dict["new_id"] = new_disks[ disk]["id"] break if "inserted_disk_serial_number" not in return_dict: raise Exception( "Could not detect any new disk. Please check the new disk is inserted and give the system a few seconds to detect the drive and refresh the page to try again." ) else: template = "replace_disk_confirm_new_disk.html" elif step == "online_new_disk": pool = request.POST["pool"] old_id = request.POST["old_id"] new_id = request.POST["new_id"] new_serial_number = request.POST["new_serial_number"] common_python_scripts_path, err = config.get_common_python_scripts_path( ) if err: raise Exception(err) cmd_list = [] cmd_list.append({ 'Replace old disk': 'zpool replace -f %s %s %s' % (pool, old_id, new_id) }) cmd_list.append({ 'Online the new disk': 'zpool online -e %s %s' % (pool, new_id) }) cmd_list.append({ 'Regenerate the system configuration': '%s/generate_manifest.py' % common_python_scripts_path }) ret, err = scheduler_utils.create_task( 'Disk replacement', cmd_list, task_type_id=1, attempts=1) if err: raise Exception(err) if not ret: raise Exception( 'Error scheduling disk replacement tasks') audit_str = "Replace disk - Scheduled a task for replacing the old disk with serial number %s with the new disk with serial number %s" % ( serial_number, new_serial_number) audit.audit("replace_disk_replaced_disk", audit_str, request) return_dict["node"] = node return_dict["old_serial_number"] = serial_number return_dict["new_serial_number"] = new_serial_number template = "replace_disk_success.html" return django.shortcuts.render_to_response( template, return_dict, context_instance=django.template.context. RequestContext(request)) else: if "serial_number" not in request.POST: raise Exception( "Incorrect access method. Please use the menus") else: if 'node' in request.POST: return_dict["node"] = request.POST["node"] else: node = si.keys()[0] return_dict["serial_number"] = request.POST[ "serial_number"] template = "replace_disk_conf.html" return django.shortcuts.render_to_response( template, return_dict, context_instance=django.template.context.RequestContext(request)) except Exception, e: return_dict['base_template'] = "storage_base.html" return_dict["page_title"] = 'Replace a disk in a ZFS pool' return_dict['tab'] = 'view_zfs_pools_tab' return_dict["error"] = 'Error replacing a disk in a ZFS pool' return_dict["error_details"] = str(e) return django.shortcuts.render_to_response( "logged_in_error.html", return_dict, context_instance=django.template.context.RequestContext(request))
def create_local_user(username, name, pswd, gid=None, smb_user=True): try: # First check if user exists. if so kick out ul, err = get_local_users() if ul: for ud in ul: if ud["username"] == username: raise Exception( "Error creating user. The user \"%s\" already exists. " % username) elif err: raise Exception("Error retrieving user list : %s" % err) enc_pswd = crypt.crypt(pswd, "28") use_salt, err = config.use_salt() if err: raise Exception(err) if use_salt: import salt.client client = salt.client.LocalClient() if gid: rc = client.cmd('*', 'user.add', [username, None, gid]) else: rc = client.cmd('*', 'user.add', [username]) # print rc if not rc: error_list.append("Error creating the username") for hostname, status in rc.items(): if not status: error_list.append("Error creating the username on node" % hostname) rc = client.cmd('*', 'shadow.set_password', [username, enc_pswd]) for hostname, status in rc.items(): if not status: error_list.append( "Error setting the password for username on GRIDCell %s" % hostname) rc = client.cmd('*', 'user.chfullname', [username, "integralstor_user_%s" % name]) for hostname, status in rc.items(): if not status: error_list.append( "Error setting the name for username on node %s" % hostname) else: # print '1' if gid: cmd_to_run = 'useradd -g %s -p %s -c integralstor_user_%s %s' % ( gid, enc_pswd, name, username) else: cmd_to_run = 'useradd -p %s -c integralstor_user_%s %s' % ( enc_pswd, name, username) lines, err = command.get_command_output(cmd_to_run) # print '2' # print lines, err if err: raise Exception(err) if smb_user: # print '/usr/bin/pdbedit -d 1 -t -a -u %s -f %s'%(username, name), "%s\n%s"%(pswd, pswd) # Now all set to create samba user # print '3' #lines, err = command.get_command_output(r'/usr/bin/pdbedit -d 1 -t -a -u %s -f %s'%(username, name), "%s\n%s"%(pswd, pswd)) (ret, rc), err = command.execute_with_conf_and_rc( r'/usr/bin/pdbedit -d 1 -t -a -u %s -f %s' % (username, name), "%s\n%s" % (pswd, pswd)) if rc == 0: lines, er = command.get_output_list(ret) if er: raise Exception(er) else: err = '' tl, er = command.get_output_list(ret) if er: raise Exception(er) if tl: err = ','.join(tl) tl, er = command.get_error_list(ret) if er: raise Exception(er) if tl: err = err + ','.join(tl) raise Exception(err) # print '4' # print lines, err except Exception, e: return False, 'Error creating local user : %s' % str(e)
def generate_status_info(path): status_dict = None try: pp = pprint.PrettyPrinter(indent=4) # First load the status fqdn = socket.getfqdn() use_salt, err = config.use_salt() if err: raise Exception(err) if use_salt: import salt.client local = salt.client.LocalClient() sd = local.cmd('*', 'integralstor.status') else: tmpsd, err = get_status() if err: raise Exception(err) sd = {} sd[fqdn] = tmpsd # pp.pprint(sd) if not sd: raise Exception('Did not get a response from salt') # pp.pprint(sd[fqdn]['interfaces']) # Load the manifest to check for discrepencies try: with open(path, 'r') as f: md = json.load(f) except Exception, e: raise Exception('Error reading the manifest file : %s' % str(e)) # pp.pprint(md) status_dict = {} # Match the status against the manifest entries for discrepencies for hostname, manifest in md.items(): # print hostname, len(hostname) temp_d = {} temp_d["errors"] = [] node_status = 0 # print sd.keys() if hostname not in sd.keys(): node_status = -1 # print "Not found in sd" else: if sd[hostname] and 'hardware_specific_dict' in sd[hostname]: temp_d['hardware_specific_dict'] = sd[hostname][ 'hardware_specific_dict'] # Process disk information disks = {} for disk_sn, disk in manifest["disks"].items(): # print disk_sn # print sd[hostname]['disks'].keys() dd = {} if disk_sn in sd[hostname]["disks"]: dd["status"] = sd[hostname]["disks"][disk_sn]["status"] if dd['status']: if 'hw_raid' in disk and (not disk['hw_raid']): if (dd["status"] not in ['PASSED', 'OK']): node_status = 1 temp_d["errors"].append( "Disk with serial number %s is reporting SMART errors." % disk_sn) elif dd["status"].lower() != 'ok': node_status = 1 temp_d["errors"].append( "Disk with serial number %s has errors." % disk_sn) dd["name"] = sd[hostname]["disks"][disk_sn]["name"] else: sd[hostname]['disks'][disk_sn] = disk sd[hostname]['disks'][disk_sn][ 'status'] = 'Disk missing' dd["status"] = "Disk Missing" node_status = 1 temp_d["errors"].append( "Disk with serial number %s seems to be missing." % disk_sn) disks[disk_sn] = dd new_disk = False for td in sd[hostname]["disks"].keys(): if td not in manifest["disks"]: new_disk = True temp_d["errors"].append( "New disk detected. Disk with serial number %s seems to be new." % td) node_status = 2 #temp_d["disks"] = disks temp_d["disks"] = sd[hostname]['disks'] # pp.pprint(temp_d['disks']) if sd[hostname]['services']: for service_name, service_info in sd[hostname][ 'services'].items(): if service_info[0] != 0: temp_d['errors'].append( 'Service %s seems to have failed.' % service_name) node_status = 1 temp_d["services"] = sd[hostname]['services'] # Process interface information interfaces = {} for ifname, ifdict in manifest["interfaces"].items(): # Check for all initially present interfaces id = {} if ifname in sd[hostname]["interfaces"]: id, err = _convert_to_status_interface_dict( sd[hostname]["interfaces"][ifname]) if err: raise Exception(err) else: id["status"] = "Interface Missing" node_status = 1 temp_d["errors"].append( "Interface with number %s seems to be missing." % ifname) interfaces[ifname] = id for ifname, ifinfo in sd[hostname]["interfaces"].items(): # Check for all newly created interfaces - bonds, vlans, etc # print 'ifname is ', ifname if ifname not in manifest["interfaces"]: id, err = _convert_to_status_interface_dict( sd[hostname]["interfaces"][ifname]) if err: raise Exception(err) interfaces[ifname] = id temp_d["interfaces"] = interfaces for ifname, id in temp_d['interfaces'].items(): if ('ip_configured' in id or 'slave_to' in id) and id["status"] != 'up': node_status = 1 temp_d["errors"].append("Interface %s is not up." % ifname) # print 'interfaces are ', temp_d['interfaces'] if "memory" in sd[hostname]: if sd[hostname]["memory"]["mem_total"]["unit"] == "kB": sd[hostname]["memory"]["mem_total"]["value"] = str( int(sd[hostname]["memory"]["mem_total"]["value"]) / 1024) sd[hostname]["memory"]["mem_total"]["unit"] = "MB" if sd[hostname]["memory"]["mem_free"]["unit"] == "kB": sd[hostname]["memory"]["mem_free"]["value"] = str( int(sd[hostname]["memory"]["mem_free"]["value"]) / 1024) sd[hostname]["memory"]["mem_free"]["unit"] = "MB" temp_d["memory"] = sd[hostname]["memory"] if "disk_usage" in sd[hostname]: temp_d["disk_usage"] = sd[hostname]["disk_usage"] if "pools" in sd[hostname]: temp_d["pools"] = sd[hostname]["pools"] if "load_avg" in sd[hostname]: # To get around a django quirk of not recognising hyphens # in dicts sd[hostname]["load_avg"]["15_min"] = sd[hostname][ "load_avg"]["15-min"] sd[hostname]["load_avg"]["5_min"] = sd[hostname][ "load_avg"]["5-min"] sd[hostname]["load_avg"]["1_min"] = sd[hostname][ "load_avg"]["1-min"] sd[hostname]["load_avg"].pop("15-min", None) sd[hostname]["load_avg"].pop("5-min", None) sd[hostname]["load_avg"].pop("1-min", None) temp_d["load_avg"] = sd[hostname]["load_avg"] if "cpu_model" in manifest: temp_d["cpu_model"] = manifest["cpu_model"] if "fqdn" in manifest: temp_d["fqdn"] = manifest["fqdn"] if 'ipmi_status' in sd[hostname]: temp_d["ipmi_status"] = sd[hostname]["ipmi_status"] if temp_d["load_avg"]['15_min'] >= temp_d["load_avg"][ 'cpu_cores']: temp_d["errors"].append( "The 15-minute load average (%.2f) has been high." % temp_d["load_avg"]['15_min']) node_status = "Degraded" if temp_d["load_avg"]['5_min'] >= temp_d["load_avg"][ 'cpu_cores']: temp_d["errors"].append( "The 5-minute load average (%.2f) has been high." % temp_d["load_avg"]['5_min']) if 'ipmi_status' in temp_d: for status_item in temp_d['ipmi_status']: if status_item["status"] not in ['ok', 'nr']: temp_d["errors"].append( 'The %s of the %s is reporting errors' % (status_item["parameter_name"], status_item["component_name"])) pools = temp_d["pools"] component_status_dict, err = zfs.get_all_components_status( pools) if err: raise Exception(err) if component_status_dict: for pool_name, component_status_list in component_status_dict.items( ): msg = None for component in component_status_list: if 'status' in component and 'state' in component[ 'status'] and component['status'][ 'state'] != 'ONLINE': if not msg: msg = "The ZFS pool '%s' has the following issue(s) : " % pool_name msg += "The component %s of type '%s' has a state of '%s'. " % ( component['name'], component['type'], component['status']['state']) if msg: temp_d['errors'].append(msg) temp_d['zfs_version'] = sd[hostname]['zfs_version'] temp_d['os_version'] = sd[hostname]['os_version'] temp_d["node_status"] = node_status if node_status == 0: temp_d["node_status_str"] = "Healthy" elif node_status == 1: temp_d["node_status_str"] = "Degraded" elif node_status == 2: temp_d["node_status_str"] = "New on-node hardware detected" elif node_status == -1: temp_d["node_status_str"] = "No response. Down?" temp_d['errors'].append('Node %s seems to be down' % hostname) status_dict[hostname] = temp_d
def generate_manifest_info(): """Generate a dictionary containing all manifest information. Will be dumped into the master.manifest file in a json format.""" manifest_dict = {} try: #pp = pprint.PrettyPrinter(indent=4) use_salt, err = config.use_salt() if err: raise Exception(err) fqdn = socket.getfqdn() if use_salt: import salt.modules.network import salt.modules.ps import salt.modules.status import salt.client import salt.wheel import salt.config local = salt.client.LocalClient() cfg, err = config.get_salt_master_config() if err: raise Exception(err) opts = salt.config.master_config(cfg) wheel = salt.wheel.Wheel(opts) keys = wheel.call_func('key.list_all') if not keys: raise Exception('No GRIDCells found!') nodes = keys['minions'] # print nodes for node in nodes: manifest_dict[node] = {} roles = local.cmd('*', 'grains.item', ['roles']) for node, info in roles.items(): if node not in manifest_dict: manifest_dict[node] = {} manifest_dict[node]['roles'] = info['roles'] ret = local.cmd('*', 'integralstor.status') for node, info in ret.items(): if node not in manifest_dict: manifest_dict[node] = {} manifest_dict[node]['cpu_model'] = info['cpu_model'] manifest_dict[node]['disks'] = info['disks'] for dn, dv in manifest_dict[node]['disks'].items(): if 'pool' in dv: dv.pop('pool') manifest_dict[node]['interfaces'] = info['interfaces'] manifest_dict[node]['memory'] = info['memory'] manifest_dict[node]['fqdn'] = info['fqdn'] if 'hardware_specific_dict' in info: manifest_dict[node]['hardware_specific_dict'] = info[ 'hardware_specific_dict'] else: # Single node so get the info using a direct call and just bung it # into the fqdn key! manifest_dict[fqdn] = {} status_dict, err = get_status() if err: raise Exception(err) manifest_dict[fqdn]['cpu_model'] = status_dict['cpu_model'] manifest_dict[fqdn]['disks'] = status_dict['disks'] for dn, dv in manifest_dict[fqdn]['disks'].items(): if 'pool' in dv: dv.pop('pool') manifest_dict[fqdn]['interfaces'] = status_dict['interfaces'] manifest_dict[fqdn]['memory'] = status_dict['memory'] manifest_dict[fqdn]['fqdn'] = fqdn # Remove transitory info and only keep the actual hardware info for node in manifest_dict.keys(): if 'interfaces' in manifest_dict[node]: for int_name, interface in manifest_dict[node][ 'interfaces'].items(): if 'up' in interface: interface.pop('up') if 'inet' in interface: interface.pop('inet') if 'disks' in manifest_dict[node]: for disk_name, diskinfo in manifest_dict[node]['disks'].items( ): if 'status' in diskinfo: diskinfo.pop('status') if 'memory' in manifest_dict[node]: if 'mem_free' in manifest_dict[node]['memory']: manifest_dict[node]['memory'].pop('mem_free') if not manifest_dict: raise Exception('Error getting manifest information') except Exception, e: return None, 'Error generating the manifest dictionary : %s' % str(e)
def generate_status_info(path): status_dict = None try: pp = pprint.PrettyPrinter(indent=4) # First load the status fqdn = socket.getfqdn() use_salt, err = config.use_salt() if err: raise Exception(err) if use_salt: import salt.client local = salt.client.LocalClient() sd = local.cmd('*', 'integralstor.status') else: tmpsd, err = get_status() if err: raise Exception(err) sd = {} sd[fqdn] = tmpsd # pp.pprint(sd) if not sd: raise Exception('Did not get a response from salt') # pp.pprint(sd[fqdn]['interfaces']) # Load the manifest to check for discrepencies try: with open(path, 'r') as f: md = json.load(f) except Exception, e: raise Exception('Error reading the manifest file : %s' % str(e)) # pp.pprint(md) status_dict = {} # Match the status against the manifest entries for discrepencies for hostname, manifest in md.items(): # print hostname, len(hostname) temp_d = {} temp_d["errors"] = [] node_status = 0 # print sd.keys() if hostname not in sd.keys(): node_status = -1 # print "Not found in sd" else: if sd[hostname] and 'hardware_specific_dict' in sd[hostname]: temp_d['hardware_specific_dict'] = sd[hostname]['hardware_specific_dict'] # Process disk information disks = {} for disk_sn, disk in manifest["disks"].items(): # print disk_sn # print sd[hostname]['disks'].keys() dd = {} if disk_sn in sd[hostname]["disks"]: dd["status"] = sd[hostname]["disks"][disk_sn]["status"] if dd['status']: if 'hw_raid' in disk and (not disk['hw_raid']): if (dd["status"] not in ['PASSED', 'OK']): node_status = 1 temp_d["errors"].append( "Disk with serial number %s is reporting SMART errors." % disk_sn) elif dd["status"].lower() != 'ok': node_status = 1 temp_d["errors"].append( "Disk with serial number %s has errors." % disk_sn) dd["name"] = sd[hostname]["disks"][disk_sn]["name"] else: sd[hostname]['disks'][disk_sn] = disk sd[hostname]['disks'][disk_sn]['status'] = 'Disk missing' dd["status"] = "Disk Missing" node_status = 1 temp_d["errors"].append( "Disk with serial number %s seems to be missing." % disk_sn) disks[disk_sn] = dd new_disk = False for td in sd[hostname]["disks"].keys(): if td not in manifest["disks"]: new_disk = True temp_d["errors"].append( "New disk detected. Disk with serial number %s seems to be new." % td) node_status = 2 #temp_d["disks"] = disks temp_d["disks"] = sd[hostname]['disks'] # pp.pprint(temp_d['disks']) if sd[hostname]['services']: for service_name, service_info in sd[hostname]['services'].items(): if service_info[0] != 0: temp_d['errors'].append( 'Service %s seems to have failed.' % service_name) node_status = 1 temp_d["services"] = sd[hostname]['services'] # Process interface information interfaces = {} for ifname, ifdict in manifest["interfaces"].items(): # Check for all initially present interfaces id = {} if ifname in sd[hostname]["interfaces"]: id, err = _convert_to_status_interface_dict( sd[hostname]["interfaces"][ifname]) if err: raise Exception(err) else: id["status"] = "Interface Missing" node_status = 1 temp_d["errors"].append( "Interface with number %s seems to be missing." % ifname) interfaces[ifname] = id for ifname, ifinfo in sd[hostname]["interfaces"].items(): # Check for all newly created interfaces - bonds, vlans, etc # print 'ifname is ', ifname if ifname not in manifest["interfaces"]: id, err = _convert_to_status_interface_dict( sd[hostname]["interfaces"][ifname]) if err: raise Exception(err) interfaces[ifname] = id temp_d["interfaces"] = interfaces for ifname, id in temp_d['interfaces'].items(): if ('ip_configured' in id or 'slave_to' in id)and id["status"] != 'up': node_status = 1 temp_d["errors"].append( "Interface %s is not up." % ifname) # print 'interfaces are ', temp_d['interfaces'] if "memory" in sd[hostname]: if sd[hostname]["memory"]["mem_total"]["unit"] == "kB": sd[hostname]["memory"]["mem_total"]["value"] = str( int(sd[hostname]["memory"]["mem_total"]["value"]) / 1024) sd[hostname]["memory"]["mem_total"]["unit"] = "MB" if sd[hostname]["memory"]["mem_free"]["unit"] == "kB": sd[hostname]["memory"]["mem_free"]["value"] = str( int(sd[hostname]["memory"]["mem_free"]["value"]) / 1024) sd[hostname]["memory"]["mem_free"]["unit"] = "MB" temp_d["memory"] = sd[hostname]["memory"] if "disk_usage" in sd[hostname]: temp_d["disk_usage"] = sd[hostname]["disk_usage"] if "pools" in sd[hostname]: temp_d["pools"] = sd[hostname]["pools"] if "load_avg" in sd[hostname]: # To get around a django quirk of not recognising hyphens # in dicts sd[hostname]["load_avg"]["15_min"] = sd[hostname]["load_avg"]["15-min"] sd[hostname]["load_avg"]["5_min"] = sd[hostname]["load_avg"]["5-min"] sd[hostname]["load_avg"]["1_min"] = sd[hostname]["load_avg"]["1-min"] sd[hostname]["load_avg"].pop("15-min", None) sd[hostname]["load_avg"].pop("5-min", None) sd[hostname]["load_avg"].pop("1-min", None) temp_d["load_avg"] = sd[hostname]["load_avg"] if "cpu_model" in manifest: temp_d["cpu_model"] = manifest["cpu_model"] if "fqdn" in manifest: temp_d["fqdn"] = manifest["fqdn"] if 'ipmi_status' in sd[hostname]: temp_d["ipmi_status"] = sd[hostname]["ipmi_status"] if temp_d["load_avg"]['15_min'] >= temp_d["load_avg"]['cpu_cores']: temp_d["errors"].append( "The 15-minute load average (%.2f) has been high." % temp_d["load_avg"]['15_min']) node_status = "Degraded" if temp_d["load_avg"]['5_min'] >= temp_d["load_avg"]['cpu_cores']: temp_d["errors"].append( "The 5-minute load average (%.2f) has been high." % temp_d["load_avg"]['5_min']) if 'ipmi_status' in temp_d: for status_item in temp_d['ipmi_status']: if status_item["status"] not in ['ok', 'nr']: temp_d["errors"].append('The %s of the %s is reporting errors' % ( status_item["parameter_name"], status_item["component_name"])) pools = temp_d["pools"] component_status_dict, err = zfs.get_all_components_status( pools) if err: raise Exception(err) if component_status_dict: for pool_name, component_status_list in component_status_dict.items(): msg = None for component in component_status_list: if 'status' in component and 'state' in component['status'] and component['status']['state'] != 'ONLINE': if not msg: msg = "The ZFS pool '%s' has the following issue(s) : " % pool_name msg += "The component %s of type '%s' has a state of '%s'. " % ( component['name'], component['type'], component['status']['state']) if msg: temp_d['errors'].append(msg) temp_d['zfs_version'] = sd[hostname]['zfs_version'] temp_d['os_version'] = sd[hostname]['os_version'] temp_d["node_status"] = node_status if node_status == 0: temp_d["node_status_str"] = "Healthy" elif node_status == 1: temp_d["node_status_str"] = "Degraded" elif node_status == 2: temp_d["node_status_str"] = "New on-node hardware detected" elif node_status == -1: temp_d["node_status_str"] = "No response. Down?" temp_d['errors'].append('Node %s seems to be down' % hostname) status_dict[hostname] = temp_d
def generate_manifest_info(): """Generate a dictionary containing all manifest information. Will be dumped into the master.manifest file in a json format.""" manifest_dict = {} try: #pp = pprint.PrettyPrinter(indent=4) use_salt, err = config.use_salt() if err: raise Exception(err) fqdn = socket.getfqdn() if use_salt: import salt.modules.network import salt.modules.ps import salt.modules.status import salt.client import salt.wheel import salt.config local = salt.client.LocalClient() cfg, err = config.get_salt_master_config() if err: raise Exception(err) opts = salt.config.master_config(cfg) wheel = salt.wheel.Wheel(opts) keys = wheel.call_func('key.list_all') if not keys: raise Exception('No GRIDCells found!') nodes = keys['minions'] # print nodes for node in nodes: manifest_dict[node] = {} roles = local.cmd('*', 'grains.item', ['roles']) for node, info in roles.items(): if node not in manifest_dict: manifest_dict[node] = {} manifest_dict[node]['roles'] = info['roles'] ret = local.cmd('*', 'integralstor.status') for node, info in ret.items(): if node not in manifest_dict: manifest_dict[node] = {} manifest_dict[node]['cpu_model'] = info['cpu_model'] manifest_dict[node]['disks'] = info['disks'] for dn, dv in manifest_dict[node]['disks'].items(): if 'pool' in dv: dv.pop('pool') manifest_dict[node]['interfaces'] = info['interfaces'] manifest_dict[node]['memory'] = info['memory'] manifest_dict[node]['fqdn'] = info['fqdn'] if 'hardware_specific_dict' in info: manifest_dict[node]['hardware_specific_dict'] = info['hardware_specific_dict'] else: # Single node so get the info using a direct call and just bung it # into the fqdn key! manifest_dict[fqdn] = {} status_dict, err = get_status() if err: raise Exception(err) manifest_dict[fqdn]['cpu_model'] = status_dict['cpu_model'] manifest_dict[fqdn]['disks'] = status_dict['disks'] for dn, dv in manifest_dict[fqdn]['disks'].items(): if 'pool' in dv: dv.pop('pool') manifest_dict[fqdn]['interfaces'] = status_dict['interfaces'] manifest_dict[fqdn]['memory'] = status_dict['memory'] manifest_dict[fqdn]['fqdn'] = fqdn # Remove transitory info and only keep the actual hardware info for node in manifest_dict.keys(): if 'interfaces' in manifest_dict[node]: for int_name, interface in manifest_dict[node]['interfaces'].items(): if 'up' in interface: interface.pop('up') if 'inet' in interface: interface.pop('inet') if 'disks' in manifest_dict[node]: for disk_name, diskinfo in manifest_dict[node]['disks'].items(): if 'status' in diskinfo: diskinfo.pop('status') if 'memory' in manifest_dict[node]: if 'mem_free' in manifest_dict[node]['memory']: manifest_dict[node]['memory'].pop('mem_free') if not manifest_dict: raise Exception('Error getting manifest information') except Exception, e: return None, 'Error generating the manifest dictionary : %s' % str(e)