def run_and_check(self, cmd, expectation=True): """ Run a command check the result. If the caller cares about failure and the command fails we raise a generic exception. """ result = ReturnCode(True) logger.info(cmd + " called") self.pdu.connect() result.message = self.pdu.run(cmd) self.pdu.disconnect() e = Exception() if not result.message.find('E000: Success'): if result.message.find('E101: Command Not Found') > -1: logger.error(result.message) result.status = False failmsg = cmd + " failed" e = ApplianceError(failmsg) elif result.message.startswith('E102: Parameter Error'): logger.critical(result.message) result.status = False failmsg = cmd + " failed" e = ApplianceUsage(failmsg) result.status = False if not expectation: return result elif not result.status: raise e return result
def pool_is_empty(vsx, pool): """ Verifypool makes sure that the pool has no extents allocated. """ if not isinstance(vsx, Vsx): e = ReturnCode(False) e.message = "object is not a Vsx instance" return e total = 0 free = 0 unique = 0 shelf = vsx.shelf ret = vsx.run_and_check("pools -a %s" % pool) # TODO: teach this that pools has -a if not ret.status: return ret p = ret.message ret = ReturnCode(True) m = re.search(r"Total[ \t]+Exts[ \t]+:[ \t]+([0-9]+)[ \t]+", p) if m: total = int(m.group(1)) m = re.search(r"Free[ \t]+Exts[ \t]+:[ \t]+([0-9]+)[ \t]+", p) if m: free = int(m.group(1)) m = re.search(r"Unique[ \t]+Exts[ \t]+:[ \t]+([0-9]+)[ \t]+", p) if m: unique = int(m.group(1)) if total: e = "Empty pool %s on %s has %d total extents:\n%s" % (pool, shelf, total, p) logger.error(e) ret.status = False ret.message += "\n%s" % e if free: e = "Empty pool %s on %s has %d free extents:\n%s" % (pool, shelf, free, p) logger.error(e) ret.status = False ret.message += "\n%s" % e if unique: e = "Empty pool %s on %s has %d unique extents:\n%s" % (pool, shelf, unique, p) logger.error(e) ret.status = False ret.message += "\n%s" % e m = re.search(r"PVs[ \t]+:[ \t]+[0-9]+\.[0-9]+", p) if m: e = "Empty pool %s on %s has PVs:\n%s" % (pool, shelf, p) logger.error(e) ret.status = False ret.message += "\n%s" % e m = re.search(r"LVs[ \t]+:[ \t]+(.*)[ \t]+", p) if m: e = "Empty pool %s on %s has LVs:\n%s" % (pool, shelf, p) logger.error(e) ret.status = False ret.message += "\n%s" % e return ret
def save_sos(sr, loc='.'): """ Save a copy of sos output into a local file :param sr: an srx object :param loc: file destination drectory filename will be in the following format:: sos_1521_15_1387393873.113084.txt """ result = ReturnCode(False) sos_output = sr.sos # File name will be a combination of model+shelfID+time file_name = 'sos_%s_%s_%s.txt' % (sr.model, sr.shelf, str(time())) # full path + file name where sos will be saved. sos_file = '%s/%s' % (loc, file_name) try: f = open(sos_file, 'w') for line in sos_output.split('\r\n'): f.write('%s\n' % line) f.close() result.status = True result.message = 'sos output stores in %s' % sos_file except IOError as e: result.message = e return result
def is_failed(sr, lun): result = ReturnCode(False) l = sr.list.get(lun) if l: if sr.version >= 7: if l['state'].find('failed') != -1: result.status = True result.message = l['state'] else: for component in l.get('raids'): if component['state'].find('failed') != -1: result.status = True result.message = component['state'] else: raise ApplianceError('trying to query status of a non-existing lun: %s' % lun) return result
def add_ssd_to_fc(sr, nssds=1): """ Add SSD a number to flashcache pool :param sr: an srx object :param nssds: number of drives ot add """ ret = ReturnCode(False) disks = sr.disks.keys() available_disks = ["%s.%s" % (sr.shelf, x) for x in disks if slot_is_available(sr, x, cache=True)] ssds = ["%s" % d for d in available_disks if is_ssd(sr, d)] if not ssds or len(ssds) < nssds: ret.message = 'No more ssds to add to flashcache' ret.status = False return ret ssds.reverse() # implicitly sorts ssds wrt slot numbers for _ in range(nssds): ret = sr.fcconfig(ssds.pop()) if not ret: break return ret
def add_ssd_to_fc(sr, nssds=1): """ Add SSD a number to flashcache pool :param sr: an srx object :param nssds: number of drives ot add """ ret = ReturnCode(False) disks = sr.disks.keys() available_disks = [ "%s.%s" % (sr.shelf, x) for x in disks if slot_is_available(sr, x, cache=True) ] ssds = ["%s" % d for d in available_disks if is_ssd(sr, d)] if not ssds or len(ssds) < nssds: ret.message = 'No more ssds to add to flashcache' ret.status = False return ret ssds.reverse() # implicitly sorts ssds wrt slot numbers for _ in range(nssds): ret = sr.fcconfig(ssds.pop()) if not ret: break return ret
def is_failed(sr, lun): result = ReturnCode(False) l = sr.list.get(lun) if l: if sr.version >= 7: if l['state'].find('failed') != -1: result.status = True result.message = l['state'] else: for component in l.get('raids'): if component['state'].find('failed') != -1: result.status = True result.message = component['state'] else: raise ApplianceError( 'trying to query status of a non-existing lun: %s' % lun) return result
def run(self, cmd, timeout=None, bufsize=-1): """ :param cmd: command to run on remote host :type cmd: str :param timeout: timeout on blocking read/write operations when exceeded socket error will be raised :type timeout: float :param bufsize: byte size of the buffer for the filehandle returned :type bufsize: int :rtype: ReturnCode """ ret = ReturnCode(False) if not self.connected: raise ConnectionError( "Run was called on an unconnected host. Did you check the result of connect()?" ) try: if self.environmentals: envstring = str() for var, value in self.environmentals.items(): statement = "%s=%s " % (var, value) envstring += statement cmd = "%s%s" % (envstring, cmd) if self.cwd: cmd = "cd %s && %s" % (self.cwd, cmd) self._log(logging.DEBUG, 'running command: "%s"' % cmd) stdin, stdout, stderr = self.exec_command(command=cmd, timeout=timeout, bufsize=bufsize) except paramiko.SSHException as e: err = "Couldn't complete the command: %s" % str(e) logger.critical(err) ret.message = err return ret # we must read stderr _before_ stdout # otherwise paramiko losses the stdout data try: ret.raw = Data(ret.raw.status, ret.raw.stdout, stderr.read()) except socket.timeout: ret.message = "Timeout" return ret status = stdout.channel.recv_exit_status() ret.raw = Data(status, stdout.read(), ret.raw.stderr) if status != 0: ret.message = ret.raw.stderr else: ret.status = True ret.message = ret.raw.stdout stdin.close() return ret
def lv_is_empty(vsx, lv): """ Verify there are no snap extents left on this LV. """ su = get_snap_used(vsx, lv) r = ReturnCode(False) if su == "0.000": r.status = True else: r.message = "Stray snap extents on LV %s: %s" % (lv, su) logger.error(r.message) return r
def run_and_check(self, cmd, expectation=True, force=False): """ Run a command check the result. If the caller cares about failure and the command fails we raise a generic exception. """ result = ReturnCode(True) confirm = re.compile("Enter[ \t]*\'y\'*.") logger.info(cmd + " called") if force: t = self.prompt self.prompt = confirm self.run(cmd) self.prompt = t result.message = self.run('y') else: result.message = self.run(cmd) e = Exception() if result.message.startswith('Error:'): logger.error(result.message) result.status = False failmsg = cmd + " failed: " + result.message e = ApplianceError(failmsg) elif result.message.startswith('Usage:'): logger.critical(result.message) result.status = False failmsg = cmd + " failed: " + result.message e = ApplianceUsage(failmsg) result.status = False if not expectation: return result elif not result.status: raise e # logger.info( result) return result
def pv_is_empty(vsx, pv=None): # TODO: use pv to filter results """ Check if most of the metadata is empty and that extents are correctly accounted for. pv parameter is ignored for now """ if not isinstance(vsx, Vsx): e = ReturnCode(False) e.message = "object is not a Vsx instance" return e total = 0 free = 0 dirty = 0 meta = 0 sh = vsx.shelf ret = vsx.pvs if not ret.status: return ret p = ret.message ret = ReturnCode(True) used = calculate_metaext(total, 4096) if used != dirty: e = "pv has too many dirty extents, should be %d on VSX %s:\n%s" % ( used, sh, p) logger.error(e) ret.status = False ret.message = e if meta: e = "meta extents not zero on VSX %s:\n%s" % (sh, p) logger.error(e) ret.status = False ret.message += "\n%s" % e if total != free + dirty: e = "pv accounting error on VSX %s:\n%s" % (sh, p) logger.error(e) ret.status = False ret.message += "\n%s" % e return ret
def run(self, cmd, timeout=None, bufsize=-1): """ :param cmd: command to run on remote host :type cmd: str :param timeout: timeout on blocking read/write operations when exceeded socket error will be raised :type timeout: float :param bufsize: byte size of the buffer for the filehandle returned :type bufsize: int :rtype: ReturnCode """ ret = ReturnCode(False) if not self.connected: raise ConnectionError("Run was called on an unconnected host. Did you check the result of connect()?") try: if self.environmentals: envstring = str() for var, value in self.environmentals.items(): statement = "%s=%s " % (var, value) envstring += statement cmd = "%s%s" % (envstring, cmd) if self.cwd: cmd = "cd %s && %s" % (self.cwd, cmd) self._log(logging.DEBUG, 'running command: "%s"' % cmd) stdin, stdout, stderr = self.exec_command(command=cmd, timeout=timeout, bufsize=bufsize) except paramiko.SSHException as e: err = "Couldn't complete the command: %s" % str(e) logger.critical(err) ret.message = err return ret # we must read stderr _before_ stdout # otherwise paramiko losses the stdout data try: ret.raw = Data(ret.raw.status, ret.raw.stdout, stderr.read()) except socket.timeout: ret.message = "Timeout" return ret status = stdout.channel.recv_exit_status() ret.raw = Data(status, stdout.read(), ret.raw.stderr) if status != 0: ret.message = ret.raw.stderr else: ret.status = True ret.message = ret.raw.stdout stdin.close() return ret
def pv_is_empty(vsx, pv=None): # TODO: use pv to filter results """ Check if most of the metadata is empty and that extents are correctly accounted for. pv parameter is ignored for now """ if not isinstance(vsx, Vsx): e = ReturnCode(False) e.message = "object is not a Vsx instance" return e total = 0 free = 0 dirty = 0 meta = 0 sh = vsx.shelf ret = vsx.pvs if not ret.status: return ret p = ret.message ret = ReturnCode(True) used = calculate_metaext(total, 4096) if used != dirty: e = "pv has too many dirty extents, should be %d on VSX %s:\n%s" % (used, sh, p) logger.error(e) ret.status = False ret.message = e if meta: e = "meta extents not zero on VSX %s:\n%s" % (sh, p) logger.error(e) ret.status = False ret.message += "\n%s" % e if total != free + dirty: e = "pv accounting error on VSX %s:\n%s" % (sh, p) logger.error(e) ret.status = False ret.message += "\n%s" % e return ret
def is_recovering(sr, lun): """ Is this lun recovering? :param sr: an srx object :param lun: a lun number as str :return: returnCode with lun state as message """ result = ReturnCode(False) l = sr.list.get(lun) if l: if sr.version >= 7: if l['state'].find('recovering') != -1: result.status = True result.message = l['state'] else: for component in l.get('raids'): if component['state'].find('recovering') != -1: result.status = True result.message = component['state'] else: raise ApplianceError('trying to query status of a non-existing lun: %s' % lun) return result
def is_recovering(sr, lun): """ Is this lun recovering? :param sr: an srx object :param lun: a lun number as str :return: returnCode with lun state as message """ result = ReturnCode(False) l = sr.list.get(lun) if l: if sr.version >= 7: if l['state'].find('recovering') != -1: result.status = True result.message = l['state'] else: for component in l.get('raids'): if component['state'].find('recovering') != -1: result.status = True result.message = component['state'] else: raise ApplianceError( 'trying to query status of a non-existing lun: %s' % lun) return result
def is_inited(sr, lun): """ check if a lun's parity is built :param sr: an srx object :param lun: a lun number as int or str :return: return code with number of seconds until done in message field """ result = ReturnCode(False) lun = str(lun) l = sr.list.get(lun) if not l: raise ApplianceUsage("lun not found '%s'" % lun) if l.get('state') == 'initing': t = sr.when.get(lun) if t: t = t['time'] # mmmmm result = ReturnCode(False, get_sec(t)) if l.get('state') == 'normal': result.status = True for component in l.get('raids'): # SRX 6 if component.get('state') == 'initing': result.status = False lc = "%s.%s" % (lun, component['number']) wt = sr.when.get(lc) if wt: t = wt['time'] or '0:0:0' else: t = '0:0:0' result.message = get_sec(t) break elif component.get('state') == 'normal': result.status = True return result
def check_margins(curr, prev, delta): ret = ReturnCode(True, 'All parameters are under the expected error margin of %s' % delta) for value in ['iops', 'bw', 'lat']: for stat in ['deviation', 'average', 'median']: logger.info('value: %s stat: %s', value, stat) currstat = curr[value][stat] prevstat = prev[value][stat] newdelta = (abs(currstat - prevstat) * 100) / prevstat logger.info('current: %f previous: %f delta: %f', currstat, prevstat, newdelta) if newdelta > delta: ret.status = False ret.message = 'Margin on %s: current: %f previous: %f outside of margin: %f' % \ (value, currstat, prevstat, delta) logger.info(ret.message) return ret
def target_is_available(self, target): """ Check if a target is visible """ if self.coraid_module == "aoe": self.aoediscover() stat = self.aoestat else: stat = self._uniqscsi(target) ret = ReturnCode(False) for targ in stat: if stat[targ]["target"] == target: ret.message = stat[targ] ret.status = True return ret
def run_and_check(self, cmd, expectation=True): """ Run a command check the result. If the caller cares about failure and the command fails we raise a generic exception. """ result = ReturnCode(True) logger.info(cmd + " called") result.message = self.run(cmd) if result.message.find('Invalid command or parameters.') > -1: # this will never work since the error is after the # prompt added except KeyError: to catch problems # below I suspect "expectation" might have a problem too. logger.error(result.message) result.status = False if not expectation: return result elif not result.status: raise ApplianceError(cmd + " failed") return result
def make_lun(sr, luns, fcenable=False, cache=False): """ Creates one or a series of luns based on a dictionary containing their details. :param luns: a dictionary of lun descriptions are created based on a dictionary :param fcenable: enable flashcache on the lun after making it :param cache: sent to slot_is_available to mean it is ok to only read the disks command once and assume they didn't change in this function. This rate limits log messages and is much faster, especially with cec. example:: { '0':{'num_disks':2,'type':'raid1','size':'-c','version':'1','iomode':'random'}, '1':{'num_disks':3,'type':'raid5','size':'2G','version':'0','clean':'False'}, '2':{'num_disks':4,'type':'raid6rs','size':'-c','version':'0','iomode':'sequential'}, '3':{'num_disks':4,'type':'raid10','size':'10G','version':'1'}, '4':{'num_disks':2,'type':'raid0','size':'-c','version':'1'}, '5':{'num_disks':1,'type':'jbod','size':'4G','version':'1'} } The dictionary should contain the details of the luns to create and the following categories are mandatory: - num_disks: how many disks in the lun - type: raid type, or jbod or raw are also valid - version: either 0 or 1 are valid The following categories are optional in the dictionary: - iomode: the default value is based on the lun type - clean : if you want the lun to avoid parity initialization, default is to do parity initialization - size: the usable size of the disk using the setsize command; if not specified, the current size, 'setesiz'd or not, of the disk will be used; -c is valid and will indicate restore the disk to its actual size. """ # pylint: disable=R0912 result = ReturnCode(False) all_nil = False not in [lun.get('type') in ['nil', 'fnil'] for lun in luns.values()] if all_nil: for lunid, cfg in luns.items: result = sr.make(lunid, cfg.get('type')) if not result: break else: result = sr.online(lunid) return result else: disks = ["%s" % x for x in range(sr.slots)] # Lets get a list of drives available inside the shelf. available_disks = ["%s.%s" % (sr.shelf, x) for x in disks if slot_is_available(sr, x, cache=cache)] for lun in luns.keys(): l = luns[lun] # Creating a reference to make the code more readable # Check if we have enough disk to create the lun if len(available_disks) >= l['num_disks']: if 'size' in l: # if size is provided, each disk on that lun will be resized to that size for d in available_disks[:l['num_disks']]: if l['size']: sr.setsize(l['size'], d) else: sr.setsize('-c', d) # Should we skip or not the lun initialization if 'clean' in l: skipInit = l['clean'] else: skipInit = False sr.make(lun, l['type'], available_disks[:l['num_disks']], l['version'], skipInit) # if iomode has been specified also, we set the mode next if 'iomode' in l: sr.setiomode(l['iomode'], lun) sr.online(lun) if fcenable is True: sr.fcenable(lun) # Remove used disks from the available list available_disks = available_disks[l['num_disks']:] else: result.message = 'Not enough disks to complete creation of lun: %s' % lun result.status = False return result result.status = True return result
def make_lun(sr, luns, fcenable=False, cache=False): """ Creates one or a series of luns based on a dictionary containing their details. :param luns: a dictionary of lun descriptions are created based on a dictionary :param fcenable: enable flashcache on the lun after making it :param cache: sent to slot_is_available to mean it is ok to only read the disks command once and assume they didn't change in this function. This rate limits log messages and is much faster, especially with cec. example:: { '0':{'num_disks':2,'type':'raid1','size':'-c','version':'1','iomode':'random'}, '1':{'num_disks':3,'type':'raid5','size':'2G','version':'0','clean':'False'}, '2':{'num_disks':4,'type':'raid6rs','size':'-c','version':'0','iomode':'sequential'}, '3':{'num_disks':4,'type':'raid10','size':'10G','version':'1'}, '4':{'num_disks':2,'type':'raid0','size':'-c','version':'1'}, '5':{'num_disks':1,'type':'jbod','size':'4G','version':'1'} } The dictionary should contain the details of the luns to create and the following categories are mandatory: - num_disks: how many disks in the lun - type: raid type, or jbod or raw are also valid - version: either 0 or 1 are valid The following categories are optional in the dictionary: - iomode: the default value is based on the lun type - clean : if you want the lun to avoid parity initialization, default is to do parity initialization - size: the usable size of the disk using the setsize command; if not specified, the current size, 'setesiz'd or not, of the disk will be used; -c is valid and will indicate restore the disk to its actual size. """ # pylint: disable=R0912 result = ReturnCode(False) all_nil = False not in [ lun.get('type') in ['nil', 'fnil'] for lun in luns.values() ] if all_nil: for lunid, cfg in luns.items: result = sr.make(lunid, cfg.get('type')) if not result: break else: result = sr.online(lunid) return result else: disks = ["%s" % x for x in range(sr.slots)] # Lets get a list of drives available inside the shelf. available_disks = [ "%s.%s" % (sr.shelf, x) for x in disks if slot_is_available(sr, x, cache=cache) ] for lun in luns.keys(): l = luns[ lun] # Creating a reference to make the code more readable # Check if we have enough disk to create the lun if len(available_disks) >= l['num_disks']: if 'size' in l: # if size is provided, each disk on that lun will be resized to that size for d in available_disks[:l['num_disks']]: if l['size']: sr.setsize(l['size'], d) else: sr.setsize('-c', d) # Should we skip or not the lun initialization if 'clean' in l: skipInit = l['clean'] else: skipInit = False sr.make(lun, l['type'], available_disks[:l['num_disks']], l['version'], skipInit) # if iomode has been specified also, we set the mode next if 'iomode' in l: sr.setiomode(l['iomode'], lun) sr.online(lun) if fcenable is True: sr.fcenable(lun) # Remove used disks from the available list available_disks = available_disks[l['num_disks']:] else: result.message = 'Not enough disks to complete creation of lun: %s' % lun result.status = False return result result.status = True return result