def check_all(self): self.log.debug('check_all() starting') cond = Condition() for zpool in self.details.keys(): rc = self._check_zpool_details(zpool) # self.log.info('_check_zpool_details on %s said %s' % (zpool, rc)) cond.set(rc) self.log.debug('check_all() ending') return cond.state
def check_all(self): self.log.debug('check_all() starting') cond = Condition() for array in self.details.keys(): rc = self._check_array_details(array) # self.log.info('_check_array_details on %s said %s' % (array, rc)) cond.set(rc) self.log.debug('check_all() ending') return cond.state
def check_all(self): self.log.debug('check_all() starting') cond = Condition() for ctrl in self.ctrl_list.keys(): rc = self._check_controller_details(ctrl) # self.log.info('_check_controller_details on %s said %s' % (ctrl, rc)) cond.set(rc) self.log.debug('check_all() ending') return cond.state
def _check_array_details(self, array): log = logging.getLogger('controller._check_array_details') cond = Condition(Condition.OK) # check the disks (physical drives) for disk in self.details[array]['disks'].keys(): detail = self.details[array]['disks'][disk] if detail['state'] != 'active': cond.error() log.error('array %s disk %s is not ok with state %s' % (array, disk, detail['state'])) detail = self.details[array] if detail['Failed Devices'] != '0': cond.error() log.error('array %s not ok with Failed Devices %s' % (array, detail['Failed Devices'])) if detail['Working Devices'] != detail['Total Devices']: cond.warning() log.warning('array %s is not ok with Total Devices %s != Working Devices %s' % (array, detail['Total Devices'], detail['Working Devices'])) return cond.state
def _check_controller_details(self, ctrl): log = logging.getLogger('controller.checkdetails') cond = Condition(Condition.OK) # pprint(self.details) # check the raid units themselves for raid in self.details[ctrl]['raids'].keys(): detail = self.details[ctrl]['raids'][raid] if detail['status'] != 'Normal': cond.error() log.error( 'controller %s raid unit %s is not ok with status %s' % (ctrl, raid, detail['status'])) # for disk in details['disks'].keys(): # detail = details['disks'][disk] # # On areca, I don't see a per-disk status yet. for volume in self.details[ctrl]['volumes'].keys(): detail = self.details[ctrl]['volumes'][volume] if detail['status'] != 'Normal': cond.error() log.error('controller %s volume %s is not ok with status %s' % (ctrl, volume, detail['status'])) return cond.state
def check_all(self): log = logging.getLogger('controller.threeware.check_all') cond = Condition() rc = self._check_controller_list() #log.info('_check_controller_list said %s' % rc) cond.set(rc) for ctrl in self.ctrl_list.keys(): rc = self._check_controller_details(ctrl) #log.info('_check_controller_details on %s said %s' % (ctrl, rc)) cond.set(rc) return cond.state
def _check_controller_list(self): log = logging.getLogger('controller.threeware._check_controller_list') cond = Condition(Condition.OK) for ctrl in self.ctrl_list.keys(): if self.ctrl_list[ctrl]['notopt'] != '0': cond.error() log.error('%s controller has a raid in non-optimal state' % ctrl) if self.ctrl_list[ctrl]['bbu_status'] != 'OK': cond.error() log.error('%s controller has a battery problem of "%s"' % (ctrl, self.ctrl_list[ctrl]['bbu_status'])) return cond.state
def _check_controller_details(self, ctrl): log = logging.getLogger( 'controller.threeware._check_controller_details') cond = Condition(Condition.OK) # check the raid units themselves for unit in self.details[ctrl]['units'].keys(): detail = self.details[ctrl]['units'][unit] if detail['status'] != 'OK': cond.error() log.error('%s raid unit %s is not ok with status %s' % (ctrl, unit, detail['status'])) if detail['cache'] != 'ON' and detail['type'] != 'SPARE': cond.error() log.error('%s raid unit %s has its cache turned off' % (ctrl, unit)) if detail['auto_verify'] != 'ON' and detail['type'] != 'SPARE': cond.warning() log.warning('%s raid unit %s does not have auto verify on' % (ctrl, unit)) for port in self.details[ctrl]['ports'].keys(): detail = self.details[ctrl]['ports'][port] # This one is a double check - if the port has an issue but doesn't belong to an active # raid, its not a problem - at most a warning if detail['status'] != 'OK' and detail['unit'] != '-': cond.error() log.error('%s port %s has a status of %s' % (ctrl, port, detail['status'])) for bbu in self.details[ctrl]['bbus'].keys(): detail = self.details[ctrl]['bbus'][bbu] if detail['status'] != 'OK': cond.error() log.error('%s bbu %s has a status of %s' % (ctrl, bbu, detail['status'])) if detail['bbuready'] != 'Yes': cond.error() log.error('%s bbu %s is not ready' % (ctrl, bbu)) if detail['lastcaptest'] == 'xx-xxx-xxxx': cond.warning() log.warn('%s bbu %s has not been capacity tested' % (ctrl, bbu)) if detail['onlinestate'] != 'On': cond.error() log.error('%s bbu %s is not online' % (ctrl, bbu)) if detail['temp'] != 'OK': cond.error() log.error('%s bbu %s has a temp problem (%s)' % (ctrl, bbu, detail['temp'])) if detail['volt'] != 'OK': cond.error() log.error('%s bbu %s has a voltage problem (%s)' % (ctrl, bbu, detail['volt'])) return cond.state
def _check_zpool_details(self, zpool): log = logging.getLogger('controller._check_zpool_details') cond = Condition(Condition.OK) # check the disks (physical drives) for disk in self.details[zpool]['disks'].keys(): detail = self.details[zpool]['disks'][disk] if detail['state'] != 'ONLINE': cond.error() log.error('zpool %s disk %s is not ok with state %s' % (zpool, disk, detail['state'])) if detail['checksum_errors'] != '0': cond.warning() log.warning( 'zpool %s disk %s is not ok with checksum_errors %s' % (zpool, disk, detail['checksum_errors'])) if detail['write_errors'] != '0': cond.warning() log.warning('zpool %s disk %s is not ok with write_errors %s' % (zpool, disk, detail['write_errors'])) if detail['read_errors'] != '0': cond.warning() log.warning('zpool %s disk %s is not ok with read_errors %s' % (zpool, disk, detail['read_errors'])) detail = self.details[zpool] if detail['state'] != 'ONLINE': cond.error() log.error('zpool %s not ok with state %s' % (zpool, detail['state'])) return cond.state
def check_all(self): cond = Condition() for ctrl in self.details.keys(): cond.set(self._check_controller_details(ctrl)) return cond.state
def _check_controller_details(self, ctrl): log = logging.getLogger('controller.checkdetails') cond = Condition(Condition.OK) # check the disks (physical drives) for disk in self.details[ctrl]['disks'].keys(): detail = self.details[ctrl]['disks'][disk] if detail['Firmware state'] != 'Online': cond.warning() log.warning( 'controller %s disk %s is not ok with Firmware State %s' % (ctrl, disk, detail['Firmware state'])) if detail['Last Predictive Failure Event Seq Number'] != '0': cond.error() log.error( 'controller %s disk %s is not ok with Last Predictive Event Number %s' % (ctrl, disk, detail['Last Predictive Failure Event Seq Number'])) if detail['Media Error Count'] != '0': cond.error() log.error( 'controller %s disk %s is not ok with Media Error Count %s' % (ctrl, disk, detail['Media Error Count'])) if detail['Predictive Failure Count'] != '0': cond.warning() log.error( 'controller %s disk %s is not ok with Predictive Failure Count %s' % (ctrl, disk, detail['Predictive Failure Count'])) for enclosure in self.details[ctrl]['enclosures'].keys(): detail = self.details[ctrl]['enclosures'][enclosure] if detail['Status'] != 'Normal': cond.error() log.error( 'controller %s enclosure unit %s is not ok with status %s' % (ctrl, raid, detail['Status'])) if detail['Number of Alarms'] != '0': cond.error() log.error( 'controller %s enclosure unit %s is not ok with Numer of Alarms %s' % (ctrl, raid, detail['Number of Alarms'])) for volume in self.details[ctrl]['volumes'].keys(): detail = self.details[ctrl]['volumes'][volume] if detail['State'] != 'Optimal': cond.error() log.error('controller %s volume %s is not ok with State %s' % (ctrl, volume, detail['State'])) return cond.state