Exemplo n.º 1
0
    def check_mddev(self, dev):
        """
        Underlying method to check the state of a MD device.

        @raise NPReadTimeoutError: on timeout reading a particular file
                                   in sys filesystem
        @raise IOError: if a sysfilesystem file disappears sinc start of
                        this script

        @param dev: the name of the MD device to check (e.g. 'md0', 'md400')
        @type dev: str

        @return: a tuple of two values:
                    * the numeric (Nagios) state
                    * a textual description of the state
        @rtype: tuple of str and int

        """

        log.debug("Checking device %r ...", dev)

        # Define directories and files in sysfs
        # /sys/block/mdX
        base_dir = os.sep + os.path.join('sys', 'block', dev)
        # /sys/block/mdX/md
        base_mddir = os.path.join(base_dir, 'md')
        # /sys/block/mdX/md/array_state
        array_state_file = os.path.join(base_mddir, 'array_state')
        # /sys/block/mdX/md/degraded
        degraded_file = os.path.join(base_mddir, 'degraded')
        # /sys/block/mdX/md/raid_disks
        raid_disks_file = os.path.join(base_mddir, 'raid_disks')
        # /sys/block/mdX/md/level
        raid_level_file = os.path.join(base_mddir, 'level')
        # /sys/block/mdX/md/degraded
        degraded_file = os.path.join(base_mddir, 'degraded')
        # /sys/block/mdX/md/suspended
        suspended_file = os.path.join(base_mddir, 'suspended')
        # /sys/block/mdX/md/sync_action
        sync_action_file = os.path.join(base_mddir, 'sync_action')
        # /sys/block/mdX/md/sync_completed
        sync_completed_file = os.path.join(base_mddir, 'sync_completed')
        # /sys/block/mdX/md/dev-*
        slavedir_pattern = os.path.join(base_mddir, 'dev-*')

        for sys_dir in (base_dir, base_mddir):
            if not os.path.isdir(sys_dir):
                raise IOError(errno.ENOENT, "Directory doesn't exists.", sys_dir)

        state = RaidState(dev)

        # Array status
        state.array_state = self.read_file(array_state_file).strip()
        # RAID level
        state.raid_level = self.read_file(raid_level_file).strip()
        # degraded state, if available
        if os.path.exists(degraded_file):
            state.degraded = bool(int(self.read_file(degraded_file)))
        # number of raid disks
        state.nr_raid_disks = int(self.read_file(raid_disks_file))
        # suspended state, if available
        if os.path.exists(suspended_file):
            state.suspended = bool(int(self.read_file(suspended_file)))
        # state of synchronisation, if available
        if os.path.exists(sync_action_file):
            state.sync_action = self.read_file(sync_action_file).strip()

        # state of synchronisation process, if available
        if os.path.exists(sync_completed_file):
            sync_state = self.read_file(sync_completed_file).strip()
            match = re_sync_completed.search(sync_state)
            if match:
                state.sectors_synced = int(match.group(1))
                state.sectors_total = int(match.group(2))
                if state.sectors_total:
                    state.sync_completed = (
                        float(state.sectors_synced) / float(state.sectors_total))

        i = 0
        while i < state.nr_raid_disks:
            state.raid_devices[i] = None
            i += 1

        if self.verbose > 3:
            log.debug(
                "Searching for slave dirs with pattern %r ...", slavedir_pattern)
        slavedirs = glob.glob(slavedir_pattern)
        if self.verbose > 2:
            log.debug("Found slave dirs: %r", slavedirs)

        for slave_dir in slavedirs:

            if self.verbose > 3:
                log.debug("Checking slave dir %r ...", slave_dir)

            # Defining some sysfs files
            # /sys/block/mdX/md/dev-XYZ/state
            slave_state_file = os.path.join(slave_dir, 'state')
            # /sys/block/mdX/md/dev-XYZ/slot
            slave_slot_file = os.path.join(slave_dir, 'slot')
            # /sys/block/mdX/md/dev-XYZ/block
            slave_block_file = os.path.join(slave_dir, 'block')

            is_spare = False

            # Reading some status files
            try:
                slave_slot = int(self.read_file(slave_slot_file))
            except ValueError:
                slave_slot = None
            slave_state = self.read_file(slave_state_file).strip()
            if slave_state == 'spare':
                is_spare = True

            rd_link = None
            if slave_slot is not None:
                rd_link = os.path.join(base_mddir, 'rd%d' % (slave_slot))

            # Retreiving the slave block device
            block_target = os.readlink(slave_block_file)
            slave_block_device = os.path.normpath(os.path.join(
                os.path.dirname(slave_block_file), block_target))
            slave_bd_basename = os.path.basename(slave_block_device)
            slave_block_device = os.sep + os.path.join('dev', slave_bd_basename)

            slave = SlaveState(slave_slot, slave_dir)
            slave.block_device = slave_block_device
            slave.state = slave_state

            # Check existense of the rdX link
            slave.rdlink = rd_link
            if rd_link is not None and os.path.exists(rd_link):
                slave.rdlink_exists = True
            else:
                slave.rdlink_exists = False

            # Assigne slave as a raid or a spare device
            state.slaves.append(slave_bd_basename)
            if is_spare:
                state.spare_devices[slave_bd_basename] = slave
            elif rd_link is None or slave_state == 'faulty':
                state.failed_devices[slave_bd_basename] = slave
            else:
                state.raid_devices[slave_slot] = slave

        if self.verbose > 2:
            log.debug("Status results for %r:\n%s", dev, pp(state.as_dict()))

        # And evaluate the results ....
        state_id = nagios.state.ok

        # Check the array state
        state_msg = "%s - %s" % (dev, state.array_state)
        if state.array_state not in (
                'readonly', 'read-auto', 'clean', 'active', 'active-idle'):
            if state.array_state == 'write-pending':
                state_id = nagios.state.warning
            elif state.array_state in ('clear', 'inactive', 'readonly'):
                state_id = nagios.state.critical
            else:
                state_id = nagios.state.unknown

        if not self.spare_ok:
            # Check for existing spare devices
            if state.spare_devices.keys():
                state_msg += ", has spares %r" % (state.spare_devices.keys())
                state_id = max_state(state_id, nagios.state.warning)

        # Check degraded and synchronisation state
        if state.degraded:

            state_msg += ", degraded"

            if state.sync_action is None:
                state_id = max_state(state_id, nagios.state.critical)
                state_msg += ", unknown sync action"
            elif state.sync_action == 'idle':
                state_id = max_state(state_id, nagios.state.critical)
                state_msg += ", idle"
            elif state.sync_action in ('resync', 'recover', 'check', 'repair'):
                state_id = max_state(state_id, nagios.state.warning)
                state_msg += ", " + state.sync_action
            else:
                state_id = max_state(state_id, nagios.state.unknown)
                state_msg += ", sync " + state.sync_action

            # Add percentage of sync completed to output
            if state.sync_completed is not None:
                state_msg += " %.1f%%" % ((state.sync_completed * 100))

        # Check state of slave devices
        for i in state.raid_devices:
            log.debug("Evaluating state of raid_device[%r]", i)
            if state.raid_devices[i] is None:
                if state.sync_action in ('resync', 'recover', 'check', 'repair'):
                    state_id = max_state(state_id, nagios.state.warning)
                else:
                    state_id = max_state(state_id, nagios.state.critical)
                state_msg += ", raid_device[%r] fails" % (i)
                continue
            raid_device = state.raid_devices[i]
            if raid_device.state in ('in_sync', 'writemostly'):
                continue
            bd = os.path.basename(raid_device.block_device)
            state_msg += ", raid_device[%r]=%s %s" % (i, bd, raid_device.state)
            if not raid_device.rdlink_exists:
                state_msg += " failed"
                state_id = max_state(state_id, nagios.state.critical)

        if state.failed_devices.keys():
            state_msg += ", failed %r" % (state.failed_devices.keys())
            state_id = max_state(state_id, nagios.state.critical)

        return (state_id, state_msg)
Exemplo n.º 2
0
    def call(self):
        """
        Method to call the plugin directly.
        """

        state = nagios.state.ok
        out = "State of physical drives of MegaRaid adapter %d seems to be okay." % (
            self.adapter_nr)

        # Enclosure Device ID: 0
        re_enc = re.compile(r'^\s*Enclosure\s+Device\s+ID\s*:\s*(\d+)', re.IGNORECASE)
        # Slot Number: 23
        re_slot = re.compile(r'^\s*Slot\s+Number\s*:\s*(\d+)', re.IGNORECASE)
        # Device Id: 6
        re_dev_id = re.compile(r'^\s*Device\s+Id\s*:\s*(\d+)', re.IGNORECASE)
        # Media Error Count: 0
        re_media_errors = re.compile(
            r'^\s*Media\s+Error\s+Count\s*:\s*(\d+)', re.IGNORECASE)
        # Other Error Count: 0
        re_other_errors = re.compile(
            r'^\s*Other\s+Error\s+Count\s*:\s*(\d+)', re.IGNORECASE)
        # Predictive Failure Count: 0
        re_pred_failures = re.compile(
            r'^\s*Predictive\s+Failure\s+Count\s*:\s*(\d+)', re.IGNORECASE)
        # Firmware state: Online, Spun Up
        re_fw_state = re.compile(r'^\s*Firmware\s+state\s*:\s*(\S+.*)', re.IGNORECASE)
        # Foreign State: None
        re_foreign_state = re.compile(
            r'^\s*Foreign\s+state\s*:\s*(\S+.*)', re.IGNORECASE)

        good_fw_states = (
            r'Online,\s+Spun\s+Up',
            r'Hotspare,\s+Spun\s+Up',
            r'Hotspare,\s+Spun\s+Down',
            r'Unconfigured\(good\),\s+Spun\s+Up',
            r'Unconfigured\(good\),\s+Spun\s+Down',
        )
        warn_fw_states = (
            r'Rebuild',
            r'Copyback',
        )
        good_fw_pattern = r'^\s*(?:' + r'|'.join(good_fw_states) + r')\s*$'
        warn_fw_pattern = r'^\s*(?:' + r'|'.join(warn_fw_states) + r')\s*$'
        re_good_fw_state = re.compile(good_fw_pattern, re.IGNORECASE)
        re_warn_fw_state = re.compile(warn_fw_pattern, re.IGNORECASE)

        drives_total = 0
        args = ('-PdList',)
        (stdoutdata, stderrdata, ret, exit_code) = self.megacli(args)
        if self.verbose > 3:
            log.debug("Output on StdOut:\n%s", stdoutdata)

        cur_dev = None

        for line in stdoutdata.splitlines():

            line = line.strip()
            m = re_enc.search(line)
            if m:
                if cur_dev:
                    if ('enclosure' in cur_dev) and ('slot' in cur_dev):
                        pd_id = '[%d:%d]' % (
                            cur_dev['enclosure'], cur_dev['slot'])
                        self.drive_list.append(pd_id)
                        self.drive[pd_id] = cur_dev

                cur_dev = {}
                drives_total += 1
                cur_dev = {
                    'enclosure': int(m.group(1)),
                    'media_errors': 0,
                    'other_errors': 0,
                    'predictive_failures': 0,
                    'fw_state': None,
                    'foreign_state': None,
                }
                continue

            m = re_slot.search(line)
            if m:
                if cur_dev:
                    cur_dev['slot'] = int(m.group(1))
                continue

            m = re_dev_id.search(line)
            if m:
                if cur_dev:
                    cur_dev['dev_id'] = int(m.group(1))
                continue

            m = re_media_errors.search(line)
            if m:
                if cur_dev:
                    cur_dev['media_errors'] = int(m.group(1))
                continue

            m = re_other_errors.search(line)
            if m:
                if cur_dev:
                    cur_dev['other_errors'] = int(m.group(1))
                continue

            m = re_pred_failures.search(line)
            if m:
                if cur_dev:
                    cur_dev['predictive_failures'] = int(m.group(1))
                continue

            m = re_fw_state.search(line)
            if m:
                if cur_dev:
                    cur_dev['fw_state'] = m.group(1)
                continue

            m = re_foreign_state.search(line)
            if m:
                if cur_dev:
                    cur_dev['foreign_state'] = m.group(1)
                continue

        if cur_dev:
            if ('enclosure' in cur_dev) and ('slot' in cur_dev):
                pd_id = '[%d:%d]' % (cur_dev['enclosure'], cur_dev['slot'])
                self.drive_list.append(pd_id)
                self.drive[pd_id] = cur_dev

        media_errors = 0
        other_errors = 0
        predictive_failures = 0
        fw_state_wrong = 0
        foreign_state_wrong = 0
        errors = []

        for pd_id in self.drive_list:
            cur_dev = self.drive[pd_id]
            found_errors = False
            drv_desc = []
            disk_state = nagios.state.ok

            if cur_dev['media_errors']:
                disk_state = max_state(disk_state, nagios.state.critical)
                found_errors = True
                drv_desc.append("%d media errors" % (cur_dev['media_errors']))
                media_errors += 1
            if cur_dev['other_errors']:
                found_errors = True
                drv_desc.append("%d other errors" % (cur_dev['other_errors']))
                other_errors += 1
            if cur_dev['predictive_failures']:
                disk_state = max_state(disk_state, nagios.state.critical)
                found_errors = True
                drv_desc.append("%d predictive failures" % (cur_dev['predictive_failures']))
                predictive_failures += 1
            if not re_good_fw_state.search(cur_dev['fw_state']):
                if re_warn_fw_state.search(cur_dev['fw_state']):
                    disk_state = max_state(disk_state, nagios.state.warning)
                else:
                    disk_state = max_state(disk_state, nagios.state.critical)
                found_errors = True
                drv_desc.append("wrong firmware state %r" % (cur_dev['fw_state']))
                fw_state_wrong += 1
            if cur_dev['foreign_state'].lower() != "none":
                disk_state = max_state(disk_state, nagios.state.critical)
                found_errors = True
                drv_desc.append("wrong foreign state %r" % (cur_dev['foreign_state']))
                foreign_state_wrong += 1
            if found_errors:
                state = max_state(state, disk_state)
                dd = "drive %s has " % (pd_id)
                dd += ' and '.join(drv_desc)
                errors.append(dd)
            if found_errors or self.verbose > 1:
                log.debug(
                    "State of drive %s is %s.", pd_id,
                    nagios.plugin.functions.STATUS_TEXT[disk_state])

        log.debug("Found %d drives.", drives_total)
        if self.verbose > 2:
            log.debug("Found Pds:\n%s", self.drive_list)
            log.debug("Found Pd data:\n%s", self.drive)

        if errors:
            out = ', '.join(errors)

        self.add_perfdata(label='drives_total', value=drives_total, uom='')
        self.add_perfdata(label='media_errors', value=media_errors, uom='')
        self.add_perfdata(label='other_errors', value=other_errors, uom='')
        self.add_perfdata(label='predictive_failures', value=predictive_failures, uom='')
        self.add_perfdata(label='wrong_fw_state', value=fw_state_wrong, uom='')
        self.add_perfdata(label='wrong_foreign_state', value=foreign_state_wrong, uom='')

        self.exit(state, out)
Exemplo n.º 3
0
    def call(self):
        """
        Method to call the plugin directly.
        """

        state = nagios.state.ok
        out = "LD %d of MegaRaid adapter %d seems to be okay." % (
            self.ld_number, self.adapter_nr)

        # Adapter 0: Virtual Drive 55 Does not Exist.
        re_not_exists = re.compile(
            r'^.*Virtual\s+Drive\s+\d+\s+Does\s+not\s+Exist\.', re.IGNORECASE)
        # RAID Level          : Primary-1, Secondary-0, RAID Level Qualifier-0
        re_raid_level = re.compile(r'^\s*RAID\s+Level\s*:\s+Primary-(\d+)',
                                   re.IGNORECASE)
        # Size                : 2.728 TB
        re_size = re.compile(r'^\s*Size\s*:\s+(\d+(?:\.\d*)?)\s*(\S+)?',
                             re.IGNORECASE)
        # State               : Optimal
        re_state = re.compile(r'^\s*State\s*:\s+(\S+)', re.IGNORECASE)
        # Number Of Drives    : 2
        re_number = re.compile(r'^\s*Number\s+Of\s+Drives\s*:\s+(\d+)',
                               re.IGNORECASE)
        # Span Depth          : 1
        re_span = re.compile(r'^\s*Span\s+Depth\s*:\s+(\d+)', re.IGNORECASE)
        # Is VD Cached: Yes
        # Is VD Cached: No
        re_cached = re.compile(r'^\s*Is\s+VD\s+Cached\s*:\s+(\S+)',
                               re.IGNORECASE)
        # Check Consistency: Completed 95%, Taken 8 min
        re_consist = re.compile(
            r'Check\s+Consistency\s*:\s+Completed\s+(\d+)%,\s+Taken\s+(\d+)\s*min',
            re.IGNORECASE)

        raid_level = None
        size_val = None
        size_unit = None
        ld_state = None
        pd_number = None
        span_depth = None
        ld_cached = None
        consist_percent = None
        consist_min = None

        args = ('-LdInfo', '-L', ("%d" % (self.ld_number)))
        (stdoutdata, stderrdata, ret, exit_code) = self.megacli(args)
        if self.verbose > 2:
            log.debug("Output on StdOut:\n%s", stdoutdata)

        for line in stdoutdata.splitlines():

            line = line.strip()

            # Logical Drive not exists
            if re_not_exists.search(line):
                self.die(line)

            match = re_raid_level.search(line)
            if match:
                raid_level = int(match.group(1))
                continue

            match = re_size.search(line)
            if match:
                size_val = float(match.group(1))
                size_unit = match.group(2)
                continue

            match = re_state.search(line)
            if match:
                ld_state = match.group(1)
                continue

            match = re_number.search(line)
            if match:
                pd_number = int(match.group(1))
                continue

            match = re_span.search(line)
            if match:
                span_depth = int(match.group(1))
                continue

            match = re_cached.search(line)
            if match:
                ld_cached = match.group(1)

            match = re_consist.search(line)
            if match:
                consist_percent = int(match.group(1))
                consist_min = int(match.group(2))

        if exit_code:
            state = nagios.state.critical
        elif not ld_state:
            state = nagios.state.critical
            ld_state = 'unknown'
        elif ld_state.lower() != 'optimal':
            state = nagios.state.critical

        consistency_out = ''
        if consist_percent is not None:
            if self.warn_on_consistency_check:
                state = max_state(state, nagios.state.warning)
            consistency_out = ", consistency check completed: %d%%, taken %d min." % (
                consist_percent, consist_min)

        cached_out = ', cached: No'
        if ld_cached:
            cached_out = ', cached: %s' % (ld_cached)
        if self.cached:
            if not ld_cached or ld_cached.lower() != 'yes':
                state = max_state(state, nagios.state.warning)

        pd_count = 9999
        if pd_number:
            pd_count = pd_number
            if span_depth and span_depth > 1:
                pd_count = pd_number * span_depth
                if raid_level < 10:
                    raid_level *= 10

        size_out = ''
        if size_val:
            if size_unit:
                size_out = ', %s %s' % (str(size_val), size_unit)
            else:
                size_out = ', %s' % (str(size_val))

        out = "State of LD %d of MegaRaid adapter %d (RAID-%d, %d drives%s%s%s): %s." % (
            self.ld_number, self.adapter_nr, raid_level, pd_count, size_out,
            cached_out, consistency_out, ld_state)

        self.exit(state, out)
Exemplo n.º 4
0
    def call(self):
        """
        Method to call the plugin directly.
        """

        state = nagios.state.ok
        out = "BBU of MegaRaid adapter %d seems to be okay." % (
            self.adapter_nr)

        re_batt_type = re.compile(r'^\s*BatteryType\s*:\s*(\S+.*)',
                                  re.IGNORECASE)
        re_batt_state = re.compile(r'^\s*Battery\s*State\s*:\s*(\S+.*)',
                                   re.IGNORECASE)
        re_voltage = re.compile(r'^\s*Voltage\s*:\s+(\S+)', re.IGNORECASE)
        re_temp = re.compile(r'^\s*Temperature\s*:\s+(\S+)', re.IGNORECASE)
        re_lc_req = re.compile(r'^\s*Learn\s+Cycle\s+Requested\s*:\s+(\S+)',
                               re.IGNORECASE)
        re_lc_act = re.compile(r'^\s*Learn\s+Cycle\s+Active\s*:\s+(\S+)',
                               re.IGNORECASE)
        re_lc_state = re.compile(r'^\s*Learn\s+Cycle\s+Status\s*:\s+(\S+)',
                                 re.IGNORECASE)
        re_lc_tout = re.compile(r'^\s*Learn\s+Cycle\s+Timeout\s*:\s+(\S+)',
                                re.IGNORECASE)
        re_i2c_err = re.compile(r'^\s*I2c\s+Errors\s+Detected\s*:\s+(\S+)',
                                re.IGNORECASE)
        re_bbu_miss = re.compile(r'^\s*Battery\s+Pack\s+Missing\s*:\s+(\S+)',
                                 re.IGNORECASE)
        re_bbu_replace = re.compile(
            r'^\s*Battery\s+Replacement\s+required\s*:\s+(\S+)', re.IGNORECASE)
        re_capac_low = re.compile(
            r'^\s*Remaining\s+Capacity\s+Low\s*:\s+(\S+)', re.IGNORECASE)
        re_per_learn = re.compile(
            r'^\s*Periodic\s+Learn\s+Required\s*:\s+(\S+)', re.IGNORECASE)
        re_trans_learn = re.compile(r'^\s*Transparent\s+Learn\s*:\s+(\S+)',
                                    re.IGNORECASE)
        re_no_space = re.compile(
            r'^\s*No\s+space\s+to\s+cache\s+offload\s*:\s+(\S+)',
            re.IGNORECASE)
        re_pack_fail = re.compile(
            r'^\s*Pack\s+is\s+about\s+to\s+fail\s+.*:\s+(\S+)', re.IGNORECASE)
        re_micro_upd = re.compile(
            r'^\s*Module\s+microcode\s+update\s+required\s*:\s+(\S+)',
            re.IGNORECASE)

        args = ('-AdpBbuCmd', '-GetBbuStatus')
        (stdoutdata, stderrdata, ret, exit_code) = self.megacli(args)
        if self.verbose > 2:
            log.debug("Output on StdOut:\n%s", stdoutdata)

        batt_type = 'unknown'
        batt_state = None  # optimal
        voltage = None  # ok
        temperature = None  # ok
        lc_req = None  # no
        lc_act = None  # no
        lc_state = None  # ok
        lc_timeout = None  # no
        i2c_err = None  # no
        bbu_miss = None  # no
        bbu_replace = None  # no
        capac_low = None  # no
        per_learn = None  # no
        trans_learn = None  # no
        no_space = None  # no
        pack_fail = None  # no
        micro_upd = None  # no

        for line in stdoutdata.splitlines():

            line = line.strip()

            match = re_batt_type.search(line)
            if match:
                batt_type = match.group(1)
                continue

            match = re_batt_state.search(line)
            if match:
                batt_state = match.group(1)
                continue

            match = re_voltage.search(line)
            if match:
                voltage = match.group(1).lower()
                continue

            match = re_temp.search(line)
            if match:
                temperature = match.group(1).lower()
                continue

            match = re_lc_req.search(line)
            if match:
                lc_req = match.group(1).lower()
                continue

            match = re_lc_act.search(line)
            if match:
                lc_act = match.group(1).lower()
                continue

            match = re_lc_state.search(line)
            if match:
                lc_state = match.group(1).lower()
                continue

            match = re_lc_tout.search(line)
            if match:
                lc_timeout = match.group(1).lower()
                continue

            match = re_i2c_err.search(line)
            if match:
                i2c_err = match.group(1).lower()
                continue

            match = re_bbu_miss.search(line)
            if match:
                bbu_miss = match.group(1).lower()
                continue

            match = re_bbu_replace.search(line)
            if match:
                bbu_replace = match.group(1).lower()
                continue

            match = re_capac_low.search(line)
            if match:
                capac_low = match.group(1).lower()
                continue

            match = re_per_learn.search(line)
            if match:
                per_learn = match.group(1).lower()
                continue

            match = re_trans_learn.search(line)
            if match:
                trans_learn = match.group(1).lower()
                continue

            match = re_no_space.search(line)
            if match:
                no_space = match.group(1).lower()
                continue

            match = re_pack_fail.search(line)
            if match:
                pack_fail = match.group(1).lower()
                continue

            match = re_micro_upd.search(line)
            if match:
                micro_upd = match.group(1).lower()
                continue

        add_infos = []
        if exit_code:
            state = nagios.state.critical
        elif not batt_state:
            state = nagios.state.critical
            batt_state = 'unknown'
        elif batt_state.lower() != 'optimal':
            state = nagios.state.critical

        if voltage and voltage != 'ok':
            state = max_state(max_state, nagios.state.critical)
            add_infos.append("Voltage is %r." % (voltage))

        if temperature and temperature != 'ok':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("Temperature is %r." % (temperature))

        if lc_req and lc_req != 'no':
            add_infos.append("Learn Cycle Requested: %r." % (lc_req))

        if lc_act and lc_act != 'no':
            add_infos.append("Learn Cycle Active: %r." % (lc_act))

        if lc_state and lc_state != 'ok':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("Learn Cycle Status: %r." % (lc_state))

        if lc_timeout and lc_timeout != 'no':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("Learn Cycle Timeout: %r." % (lc_timeout))

        if i2c_err and i2c_err != 'no':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("I2c Errors Detected %r." % (i2c_err))

        if bbu_miss and bbu_miss != 'no':
            state = max_state(max_state, nagios.state.critical)
            add_infos.append("Battery Pack Missing: %r." % (bbu_miss))

        if bbu_replace and bbu_replace != 'no':
            state = max_state(max_state, nagios.state.critical)
            add_infos.append("Battery Replacement required: %r." %
                             (bbu_replace))

        if capac_low and capac_low != 'no':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("Remaining Capacity Low: %r." % (capac_low))

        if per_learn and per_learn != 'no':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("Periodic Learn Required: %r." % (per_learn))

        if trans_learn and trans_learn != 'no':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("Transparent Learn: %r." % (trans_learn))

        if no_space and no_space != 'no':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("No space to cache offload %r." % (no_space))

        if pack_fail and pack_fail != 'no':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append(
                "Pack is about to fail & should be replaced: %r." %
                (pack_fail))

        if micro_upd and micro_upd != 'no':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("Module microcode update required: %r." %
                             (micro_upd))

        add_info = ''
        if add_infos:
            add_info = '; ' + ', '.join(add_infos)

        out = "State of BBU of MegaRaid adapter %d (type %s): %s%s" % (
            self.adapter_nr, batt_type, batt_state, add_info)

        self.exit(state, out)
Exemplo n.º 5
0
    def call(self):
        """
        Method to call the plugin directly.
        """

        state = nagios.state.ok
        out = "BBU of MegaRaid adapter %d seems to be okay." % (self.adapter_nr)

        re_batt_type = re.compile(r'^\s*BatteryType\s*:\s*(\S+.*)', re.IGNORECASE)
        re_batt_state = re.compile(r'^\s*Battery\s*State\s*:\s*(\S+.*)', re.IGNORECASE)
        re_voltage = re.compile(r'^\s*Voltage\s*:\s+(\S+)', re.IGNORECASE)
        re_temp = re.compile(r'^\s*Temperature\s*:\s+(\S+)', re.IGNORECASE)
        re_lc_req = re.compile(r'^\s*Learn\s+Cycle\s+Requested\s*:\s+(\S+)', re.IGNORECASE)
        re_lc_act = re.compile(r'^\s*Learn\s+Cycle\s+Active\s*:\s+(\S+)', re.IGNORECASE)
        re_lc_state = re.compile(r'^\s*Learn\s+Cycle\s+Status\s*:\s+(\S+)', re.IGNORECASE)
        re_lc_tout = re.compile(r'^\s*Learn\s+Cycle\s+Timeout\s*:\s+(\S+)', re.IGNORECASE)
        re_i2c_err = re.compile(r'^\s*I2c\s+Errors\s+Detected\s*:\s+(\S+)', re.IGNORECASE)
        re_bbu_miss = re.compile(r'^\s*Battery\s+Pack\s+Missing\s*:\s+(\S+)', re.IGNORECASE)
        re_bbu_replace = re.compile(
            r'^\s*Battery\s+Replacement\s+required\s*:\s+(\S+)', re.IGNORECASE)
        re_capac_low = re.compile(
            r'^\s*Remaining\s+Capacity\s+Low\s*:\s+(\S+)', re.IGNORECASE)
        re_per_learn = re.compile(
            r'^\s*Periodic\s+Learn\s+Required\s*:\s+(\S+)', re.IGNORECASE)
        re_trans_learn = re.compile(
            r'^\s*Transparent\s+Learn\s*:\s+(\S+)', re.IGNORECASE)
        re_no_space = re.compile(
            r'^\s*No\s+space\s+to\s+cache\s+offload\s*:\s+(\S+)', re.IGNORECASE)
        re_pack_fail = re.compile(
            r'^\s*Pack\s+is\s+about\s+to\s+fail\s+.*:\s+(\S+)', re.IGNORECASE)
        re_micro_upd = re.compile(
            r'^\s*Module\s+microcode\s+update\s+required\s*:\s+(\S+)', re.IGNORECASE)

        args = ('-AdpBbuCmd', '-GetBbuStatus')
        (stdoutdata, stderrdata, ret, exit_code) = self.megacli(args)
        if self.verbose > 2:
            log.debug("Output on StdOut:\n%s", stdoutdata)

        batt_type = 'unknown'
        batt_state = None       # optimal
        voltage = None          # ok
        temperature = None      # ok
        lc_req = None           # no
        lc_act = None           # no
        lc_state = None         # ok
        lc_timeout = None       # no
        i2c_err = None          # no
        bbu_miss = None         # no
        bbu_replace = None      # no
        capac_low = None        # no
        per_learn = None        # no
        trans_learn = None      # no
        no_space = None         # no
        pack_fail = None        # no
        micro_upd = None        # no

        for line in stdoutdata.splitlines():

            line = line.strip()

            match = re_batt_type.search(line)
            if match:
                batt_type = match.group(1)
                continue

            match = re_batt_state.search(line)
            if match:
                batt_state = match.group(1)
                continue

            match = re_voltage.search(line)
            if match:
                voltage = match.group(1).lower()
                continue

            match = re_temp.search(line)
            if match:
                temperature = match.group(1).lower()
                continue

            match = re_lc_req.search(line)
            if match:
                lc_req = match.group(1).lower()
                continue

            match = re_lc_act.search(line)
            if match:
                lc_act = match.group(1).lower()
                continue

            match = re_lc_state.search(line)
            if match:
                lc_state = match.group(1).lower()
                continue

            match = re_lc_tout.search(line)
            if match:
                lc_timeout = match.group(1).lower()
                continue

            match = re_i2c_err.search(line)
            if match:
                i2c_err = match.group(1).lower()
                continue

            match = re_bbu_miss.search(line)
            if match:
                bbu_miss = match.group(1).lower()
                continue

            match = re_bbu_replace.search(line)
            if match:
                bbu_replace = match.group(1).lower()
                continue

            match = re_capac_low.search(line)
            if match:
                capac_low = match.group(1).lower()
                continue

            match = re_per_learn.search(line)
            if match:
                per_learn = match.group(1).lower()
                continue

            match = re_trans_learn.search(line)
            if match:
                trans_learn = match.group(1).lower()
                continue

            match = re_no_space.search(line)
            if match:
                no_space = match.group(1).lower()
                continue

            match = re_pack_fail.search(line)
            if match:
                pack_fail = match.group(1).lower()
                continue

            match = re_micro_upd.search(line)
            if match:
                micro_upd = match.group(1).lower()
                continue

        add_infos = []
        if exit_code:
            state = nagios.state.critical
        elif not batt_state:
            state = nagios.state.critical
            batt_state = 'unknown'
        elif batt_state.lower() != 'optimal':
            state = nagios.state.critical

        if voltage and voltage != 'ok':
            state = max_state(max_state, nagios.state.critical)
            add_infos.append("Voltage is %r." % (voltage))

        if temperature and temperature != 'ok':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("Temperature is %r." % (temperature))

        if lc_req and lc_req != 'no':
            add_infos.append("Learn Cycle Requested: %r." % (lc_req))

        if lc_act and lc_act != 'no':
            add_infos.append("Learn Cycle Active: %r." % (lc_act))

        if lc_state and lc_state != 'ok':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("Learn Cycle Status: %r." % (lc_state))

        if lc_timeout and lc_timeout != 'no':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("Learn Cycle Timeout: %r." % (lc_timeout))

        if i2c_err and i2c_err != 'no':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("I2c Errors Detected %r." % (i2c_err))

        if bbu_miss and bbu_miss != 'no':
            state = max_state(max_state, nagios.state.critical)
            add_infos.append("Battery Pack Missing: %r." % (bbu_miss))

        if bbu_replace and bbu_replace != 'no':
            state = max_state(max_state, nagios.state.critical)
            add_infos.append("Battery Replacement required: %r." % (bbu_replace))

        if capac_low and capac_low != 'no':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("Remaining Capacity Low: %r." % (capac_low))

        if per_learn and per_learn != 'no':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("Periodic Learn Required: %r." % (per_learn))

        if trans_learn and trans_learn != 'no':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("Transparent Learn: %r." % (trans_learn))

        if no_space and no_space != 'no':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("No space to cache offload %r." % (no_space))

        if pack_fail and pack_fail != 'no':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("Pack is about to fail & should be replaced: %r." % (pack_fail))

        if micro_upd and micro_upd != 'no':
            state = max_state(max_state, nagios.state.warning)
            add_infos.append("Module microcode update required: %r." % (micro_upd))

        add_info = ''
        if add_infos:
            add_info = '; ' + ', '.join(add_infos)

        out = "State of BBU of MegaRaid adapter %d (type %s): %s%s" % (
            self.adapter_nr, batt_type, batt_state, add_info)

        self.exit(state, out)
Exemplo n.º 6
0
    def call(self):
        """
        Method to call the plugin directly.
        """

        state = nagios.state.ok
        out = "LD %d of MegaRaid adapter %d seems to be okay." % (
            self.ld_number, self.adapter_nr)

        # Adapter 0: Virtual Drive 55 Does not Exist.
        re_not_exists = re.compile(
            r'^.*Virtual\s+Drive\s+\d+\s+Does\s+not\s+Exist\.', re.IGNORECASE)
        # RAID Level          : Primary-1, Secondary-0, RAID Level Qualifier-0
        re_raid_level = re.compile(
            r'^\s*RAID\s+Level\s*:\s+Primary-(\d+)', re.IGNORECASE)
        # Size                : 2.728 TB
        re_size = re.compile(
            r'^\s*Size\s*:\s+(\d+(?:\.\d*)?)\s*(\S+)?', re.IGNORECASE)
        # State               : Optimal
        re_state = re.compile(r'^\s*State\s*:\s+(\S+)', re.IGNORECASE)
        # Number Of Drives    : 2
        re_number = re.compile(
            r'^\s*Number\s+Of\s+Drives\s*:\s+(\d+)', re.IGNORECASE)
        # Span Depth          : 1
        re_span = re.compile(r'^\s*Span\s+Depth\s*:\s+(\d+)', re.IGNORECASE)
        # Is VD Cached: Yes
        # Is VD Cached: No
        re_cached = re.compile(
            r'^\s*Is\s+VD\s+Cached\s*:\s+(\S+)', re.IGNORECASE)
        # Check Consistency: Completed 95%, Taken 8 min
        re_consist = re.compile(
            r'Check\s+Consistency\s*:\s+Completed\s+(\d+)%,\s+Taken\s+(\d+)\s*min',
            re.IGNORECASE)

        raid_level = None
        size_val = None
        size_unit = None
        ld_state = None
        pd_number = None
        span_depth = None
        ld_cached = None
        consist_percent = None
        consist_min = None

        args = ('-LdInfo', '-L', ("%d" % (self.ld_number)))
        (stdoutdata, stderrdata, ret, exit_code) = self.megacli(args)
        if self.verbose > 2:
            log.debug("Output on StdOut:\n%s", stdoutdata)

        for line in stdoutdata.splitlines():

            line = line.strip()

            # Logical Drive not exists
            if re_not_exists.search(line):
                self.die(line)

            match = re_raid_level.search(line)
            if match:
                raid_level = int(match.group(1))
                continue

            match = re_size.search(line)
            if match:
                size_val = float(match.group(1))
                size_unit = match.group(2)
                continue

            match = re_state.search(line)
            if match:
                ld_state = match.group(1)
                continue

            match = re_number.search(line)
            if match:
                pd_number = int(match.group(1))
                continue

            match = re_span.search(line)
            if match:
                span_depth = int(match.group(1))
                continue

            match = re_cached.search(line)
            if match:
                ld_cached = match.group(1)

            match = re_consist.search(line)
            if match:
                consist_percent = int(match.group(1))
                consist_min = int(match.group(2))

        if exit_code:
            state = nagios.state.critical
        elif not ld_state:
            state = nagios.state.critical
            ld_state = 'unknown'
        elif ld_state.lower() != 'optimal':
            state = nagios.state.critical

        consistency_out = ''
        if consist_percent is not None:
            if self.warn_on_consistency_check:
                state = max_state(state, nagios.state.warning)
            consistency_out = ", consistency check completed: %d%%, taken %d min." % (
                consist_percent, consist_min)

        cached_out = ', cached: No'
        if ld_cached:
            cached_out = ', cached: %s' % (ld_cached)
        if self.cached:
            if not ld_cached or ld_cached.lower() != 'yes':
                state = max_state(state, nagios.state.warning)

        pd_count = 9999
        if pd_number:
            pd_count = pd_number
            if span_depth and span_depth > 1:
                pd_count = pd_number * span_depth
                if raid_level < 10:
                    raid_level *= 10

        size_out = ''
        if size_val:
            if size_unit:
                size_out = ', %s %s' % (str(size_val), size_unit)
            else:
                size_out = ', %s' % (str(size_val))

        out = "State of LD %d of MegaRaid adapter %d (RAID-%d, %d drives%s%s%s): %s." % (
            self.ld_number, self.adapter_nr, raid_level, pd_count,
            size_out, cached_out, consistency_out, ld_state)

        self.exit(state, out)
Exemplo n.º 7
0
    def call(self):
        """
        Method to call the plugin directly.
        """

        state = nagios.state.ok
        out = "State of physical drives of MegaRaid adapter %d seems to be okay." % (
            self.adapter_nr)

        # Enclosure Device ID: 0
        re_enc = re.compile(r'^\s*Enclosure\s+Device\s+ID\s*:\s*(\d+)',
                            re.IGNORECASE)
        # Slot Number: 23
        re_slot = re.compile(r'^\s*Slot\s+Number\s*:\s*(\d+)', re.IGNORECASE)
        # Device Id: 6
        re_dev_id = re.compile(r'^\s*Device\s+Id\s*:\s*(\d+)', re.IGNORECASE)
        # Media Error Count: 0
        re_media_errors = re.compile(r'^\s*Media\s+Error\s+Count\s*:\s*(\d+)',
                                     re.IGNORECASE)
        # Other Error Count: 0
        re_other_errors = re.compile(r'^\s*Other\s+Error\s+Count\s*:\s*(\d+)',
                                     re.IGNORECASE)
        # Predictive Failure Count: 0
        re_pred_failures = re.compile(
            r'^\s*Predictive\s+Failure\s+Count\s*:\s*(\d+)', re.IGNORECASE)
        # Firmware state: Online, Spun Up
        re_fw_state = re.compile(r'^\s*Firmware\s+state\s*:\s*(\S+.*)',
                                 re.IGNORECASE)
        # Foreign State: None
        re_foreign_state = re.compile(r'^\s*Foreign\s+state\s*:\s*(\S+.*)',
                                      re.IGNORECASE)

        good_fw_states = (
            r'Online,\s+Spun\s+Up',
            r'Hotspare,\s+Spun\s+Up',
            r'Hotspare,\s+Spun\s+Down',
            r'Unconfigured\(good\),\s+Spun\s+Up',
            r'Unconfigured\(good\),\s+Spun\s+Down',
        )
        warn_fw_states = (
            r'Rebuild',
            r'Copyback',
        )
        good_fw_pattern = r'^\s*(?:' + r'|'.join(good_fw_states) + r')\s*$'
        warn_fw_pattern = r'^\s*(?:' + r'|'.join(warn_fw_states) + r')\s*$'
        re_good_fw_state = re.compile(good_fw_pattern, re.IGNORECASE)
        re_warn_fw_state = re.compile(warn_fw_pattern, re.IGNORECASE)

        drives_total = 0
        args = ('-PdList', )
        (stdoutdata, stderrdata, ret, exit_code) = self.megacli(args)
        if self.verbose > 3:
            log.debug("Output on StdOut:\n%s", stdoutdata)

        cur_dev = None

        for line in stdoutdata.splitlines():

            line = line.strip()
            m = re_enc.search(line)
            if m:
                if cur_dev:
                    if ('enclosure' in cur_dev) and ('slot' in cur_dev):
                        pd_id = '[%d:%d]' % (cur_dev['enclosure'],
                                             cur_dev['slot'])
                        self.drive_list.append(pd_id)
                        self.drive[pd_id] = cur_dev

                cur_dev = {}
                drives_total += 1
                cur_dev = {
                    'enclosure': int(m.group(1)),
                    'media_errors': 0,
                    'other_errors': 0,
                    'predictive_failures': 0,
                    'fw_state': None,
                    'foreign_state': None,
                }
                continue

            m = re_slot.search(line)
            if m:
                if cur_dev:
                    cur_dev['slot'] = int(m.group(1))
                continue

            m = re_dev_id.search(line)
            if m:
                if cur_dev:
                    cur_dev['dev_id'] = int(m.group(1))
                continue

            m = re_media_errors.search(line)
            if m:
                if cur_dev:
                    cur_dev['media_errors'] = int(m.group(1))
                continue

            m = re_other_errors.search(line)
            if m:
                if cur_dev:
                    cur_dev['other_errors'] = int(m.group(1))
                continue

            m = re_pred_failures.search(line)
            if m:
                if cur_dev:
                    cur_dev['predictive_failures'] = int(m.group(1))
                continue

            m = re_fw_state.search(line)
            if m:
                if cur_dev:
                    cur_dev['fw_state'] = m.group(1)
                continue

            m = re_foreign_state.search(line)
            if m:
                if cur_dev:
                    cur_dev['foreign_state'] = m.group(1)
                continue

        if cur_dev:
            if ('enclosure' in cur_dev) and ('slot' in cur_dev):
                pd_id = '[%d:%d]' % (cur_dev['enclosure'], cur_dev['slot'])
                self.drive_list.append(pd_id)
                self.drive[pd_id] = cur_dev

        media_errors = 0
        other_errors = 0
        predictive_failures = 0
        fw_state_wrong = 0
        foreign_state_wrong = 0
        errors = []

        for pd_id in self.drive_list:
            cur_dev = self.drive[pd_id]
            found_errors = False
            drv_desc = []
            disk_state = nagios.state.ok

            if cur_dev['media_errors']:
                disk_state = max_state(disk_state, nagios.state.critical)
                found_errors = True
                drv_desc.append("%d media errors" % (cur_dev['media_errors']))
                media_errors += 1
            if cur_dev['other_errors']:
                found_errors = True
                drv_desc.append("%d other errors" % (cur_dev['other_errors']))
                other_errors += 1
            if cur_dev['predictive_failures']:
                disk_state = max_state(disk_state, nagios.state.critical)
                found_errors = True
                drv_desc.append("%d predictive failures" %
                                (cur_dev['predictive_failures']))
                predictive_failures += 1
            if not re_good_fw_state.search(cur_dev['fw_state']):
                if re_warn_fw_state.search(cur_dev['fw_state']):
                    disk_state = max_state(disk_state, nagios.state.warning)
                else:
                    disk_state = max_state(disk_state, nagios.state.critical)
                found_errors = True
                drv_desc.append("wrong firmware state %r" %
                                (cur_dev['fw_state']))
                fw_state_wrong += 1
            if cur_dev['foreign_state'].lower() != "none":
                disk_state = max_state(disk_state, nagios.state.critical)
                found_errors = True
                drv_desc.append("wrong foreign state %r" %
                                (cur_dev['foreign_state']))
                foreign_state_wrong += 1
            if found_errors:
                state = max_state(state, disk_state)
                dd = "drive %s has " % (pd_id)
                dd += ' and '.join(drv_desc)
                errors.append(dd)
            if found_errors or self.verbose > 1:
                log.debug("State of drive %s is %s.", pd_id,
                          nagios.plugin.functions.STATUS_TEXT[disk_state])

        log.debug("Found %d drives.", drives_total)
        if self.verbose > 2:
            log.debug("Found Pds:\n%s", self.drive_list)
            log.debug("Found Pd data:\n%s", self.drive)

        if errors:
            out = ', '.join(errors)

        self.add_perfdata(label='drives_total', value=drives_total, uom='')
        self.add_perfdata(label='media_errors', value=media_errors, uom='')
        self.add_perfdata(label='other_errors', value=other_errors, uom='')
        self.add_perfdata(label='predictive_failures',
                          value=predictive_failures,
                          uom='')
        self.add_perfdata(label='wrong_fw_state', value=fw_state_wrong, uom='')
        self.add_perfdata(label='wrong_foreign_state',
                          value=foreign_state_wrong,
                          uom='')

        self.exit(state, out)