Exemplo n.º 1
0
    def write(self, value):
        '''
        Writes to write_source using util.

        Arguments:
            value: value to be set to the sensor

        Return:
            N/A
        '''
        if self.write_source is None:
            return
        cmd = self.write_source % (int(value))
        Logger.debug("Setting value using cmd=%s" % cmd)
        response = ''
        try:
            response = Popen(cmd, shell=True,
                             stdout=PIPE).stdout.read().decode()
            if response.find("Error") != -1:
                raise Exception("Write failed with response=%s" % response)
        except SystemExit:
            Logger.debug("SystemExit from sensor write")
            raise
        except Exception:
            Logger.crit("Exception with cmd=%s response=%s" % (cmd, response))
Exemplo n.º 2
0
def get_sensor_tuples(fru_name, sensor_num, sensor_sources):
    """
    Method to walk through each of the sensor sources to build the tuples
    of the form 'SensorValue'

    Arguments:
        fru_name: fru where the sensors should be read from
        sensor_sources: Set of all sensor souces from fsc config
    Returns:
        SensorValue tuples
    """
    result = {}
    for key, value in list(sensor_sources.items()):
        if isinstance(value.source, FscSensorSourceUtil):
            result = parse_all_sensors_util(
                sensor_sources[key].source.read(fru=fru_name, num=sensor_num)
            )
            break  # Hack: util reads all sensors
        elif isinstance(sensor_sources.get(key).source, FscSensorSourceSysfs):
            symbolized_key, tuple = get_sensor_tuple_sysfs(
                key,
                sensor_sources[key].source.read(),
                sensor_sources[key].source.read_source_fail_counter,
                sensor_sources[key].source.read_source_wrong_counter,
            )
            result[symbolized_key] = tuple
        else:
            Logger.crit("Unknown source type")
    return result
Exemplo n.º 3
0
def sensor_valid_check(board, sname, check_name, attribute):
    cmd = ""
    data = ""

    if str(board) == "all":
        sdata = sname.split("_")
        board = sdata[0]
        sname = sname.replace(board + "_", "")
    Logger.debug("board=%s sname=%s" % (board, sname))

    try:
        if attribute["type"] == "power_status":
            # check power status first
            pwr_sts = bmc_read_power()
            if pwr_sts == 1:
                return 1
            return 0
        else:
            Logger.debug(
                "Sensor corresponding valid check funciton not found!")
            return -1
    except SystemExit:
        Logger.debug("SystemExit from sensor read")
        raise
    except Exception as err:
        Logger.crit(
            "Exception with board=%s, sensor_name=%s, cmd=%s, response=%s, err=%s"
            % (board, sname, cmd, data, err))
    return 0
Exemplo n.º 4
0
    def read(self, **kwargs):
        '''
        Reads all sensors values from sysfs source and return data read.
        There are two kinds of sensors temperature and fans.

        Arguments:
            kwargs: set of aruments needed to read from sysfs

        Return:
            blob of data read from sysfs
        '''
        cmd = 'cat ' + self.read_source
        Logger.debug("Reading data with cmd=%s" % cmd)
        data = ''
        try:
            proc = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
            data = proc.stdout.read()
            err = proc.stderr.read()
            if err:
                self.read_source_fail_counter += 1
            else:
                self.read_source_fail_counter = 0
        except SystemExit:
            Logger.debug("SystemExit from sensor read")
            self.read_source_fail_counter += 1
            raise
        except Exception:
            Logger.crit("Exception with cmd=%s response=%s" % (cmd, data))
            self.read_source_fail_counter += 1
        return data
Exemplo n.º 5
0
def sensor_valid_check(board, sname, check_name, attribute):
    cmd = ""
    data = ""
    try:
        if attribute["type"] == "power_status":
            return bmc_read_power()

        elif attribute["type"] == "gpio":
            cmd = ["gpiocli", "get-value", "--shadow", attribute["shadow"]]
            data = check_output(cmd).decode().split("=")
            if int(data[1]) == 0:
                return 1
            else:
                return 0
        else:
            Logger.debug(
                "Sensor corresponding valid check funciton not found!")
            return -1
    except SystemExit:
        Logger.debug("SystemExit from sensor read")
        raise
    except Exception as err:
        Logger.crit(
            "Exception with board=%s, sensor_name=%s, cmd=%s, response=%s, err=%s"
            % (board, sname, cmd, data, err))
    return 0
Exemplo n.º 6
0
def board_host_actions(action="None", cause="None"):
    if "host_shutdown" in action:
        Logger.crit("Host is shutdown due to cause %s" % (str(cause), ))
        return yamp_host_shutdown()
    Logger.warn("Host needs action '%s' and cause '%s'" %
                (str(action), str(cause)))
    pass
Exemplo n.º 7
0
    def write(self, value):
        """
        Writes to write_source using echo to sysfs location
        echo #value > sysfs_path

        Arguments:
            value: value to be set to the sensor

        Return:
            N/A
        """
        if self.write_source is None:
            return
        cmd = ("echo " + str(value * self.max_duty_register / 100) + " > " +
               self.write_source)
        Logger.debug("Setting value using cmd=%s" % cmd)
        response = ""
        try:
            response = Popen(cmd, shell=True,
                             stdout=PIPE).stdout.read().decode()
        except SystemExit:
            Logger.debug("SystemExit from sensor write")
            raise
        except Exception:
            Logger.crit("Exception with cmd=%s response=%s" % (cmd, response))
Exemplo n.º 8
0
    def read(self, **kwargs):
        '''
        Reads all sensors values from the util and return data read.
        There are two kinds of sensors temperature and fans. Following
        are the util usages:
        sensor util: 'util <fru name>' Reads all sensors from a specific fru
                     'util <fru name> <sensor number>' Reads sensor from a specific fru number
        fan util: 'util' Reads all fan speeds

        Arguments:
            kwargs: set of aruments needed to read from any of the util

        Return:
            blob of data read from util
        '''
        cmd = self.read_source
        if 'fru' in kwargs:
            if 'num' in kwargs and len(kwargs['num']):
                cmd = ''
                for num in kwargs['num']:
                    cmd += self.read_source + " " + kwargs[
                        'fru'] + " " + num + ";"
            else:
                cmd = cmd + " " + kwargs['fru']
        Logger.debug("Reading data with cmd=%s" % cmd)
        data = ''
        try:
            data = Popen(cmd, shell=True, stdout=PIPE).stdout.read().decode()
        except SystemExit:
            Logger.debug("SystemExit from sensor read")
            raise
        except Exception:
            Logger.crit("Exception with cmd=%s response=%s" % (cmd, data))
        return data
Exemplo n.º 9
0
def sensor_valid_check(board, sname, check_name, attribute):
    cmd = ''
    data = ''
    try:
        if attribute['type'] == "power_BIC_status":
            cmd = "/usr/local/bin/power-util %s status" % attribute['fru']
            data = ''
            data = Popen(cmd, shell=True, stdout=PIPE).stdout.read().decode()
            result = data.split(": ")
            if match(r'ON', result[1]) != None:
                cmd = "cat /sys/class/gpio/gpio%s/value" % attribute['number']
                data = ''
                data = Popen(cmd, shell=True,
                             stdout=PIPE).stdout.read().decode()
                if int(data) == 0:
                    return 1
                else:
                    return 0
            else:
                return 0

        elif attribute['type'] == "gpio_power_nvme":
            cmd = "cat /sys/class/gpio/gpio%s/value" % attribute['number']
            data = ''
            data = Popen(cmd, shell=True, stdout=PIPE).stdout.read().decode()
            if int(data) == 0:
                cmd = "/usr/local/bin/power-util %s status" % attribute['fru']
                data = ''
                data = Popen(cmd, shell=True,
                             stdout=PIPE).stdout.read().decode()
                result = data.split(": ")
                if match(r'ON', result[1]) != None:
                    cmd = "/tmp/cache_store/M2_%s_NVMe" % attribute['nvme']
                    data = ''
                    if os.path.isfile(cmd) == True:
                        data = open(cmd, "r")
                        if data.read() == "1":
                            return 1
                        else:
                            return 0
                    else:
                        return 0
                else:
                    return 0
            else:
                return 0
        else:
            Logger.debug(
                "Sensor corresponding valid check funciton not found!")
            return 0
    except SystemExit:
        Logger.debug("SystemExit from sensor read")
        raise
    except Exception:
        Logger.crit(
            "Exception with board=%s, sensor_name=%s, cmd=%s, response=%s" %
            (board, sname, cmd, data))
    return 0
Exemplo n.º 10
0
def board_host_actions(action="None", cause="None"):
    """
    Override the method to define fan specific actions like:
    - handling host power off
    - alarming/syslogging criticals
    """
    if "host_shutdown" in action:
        Logger.crit("Host is shutdown due to cause %s" % (str(cause),))
        return host_shutdown()
    Logger.warn("Host needs action '%s' and cause '%s'" % (str(action), str(cause)))
    pass
Exemplo n.º 11
0
def sensor_valid_check(board, sname, check_name, attribute):
    cmd = ""
    data = ""
    try:
        if attribute["type"] == "power_BIC_status":
            cmd = "/usr/local/bin/power-util %s status" % attribute["fru"]
            data = ""
            data = Popen(cmd, shell=True, stdout=PIPE).stdout.read().decode()
            result = data.split(": ")
            if match(r"ON", result[1]) is not None:
                cmd = "cat /sys/class/gpio/gpio%s/value" % attribute["number"]
                data = ""
                data = Popen(cmd, shell=True,
                             stdout=PIPE).stdout.read().decode()
                if int(data) == 0:
                    return 1
                else:
                    return 0
            else:
                return 0

        elif attribute["type"] == "gpio_power_nvme":
            cmd = "cat /sys/class/gpio/gpio%s/value" % attribute["number"]
            data = ""
            data = Popen(cmd, shell=True, stdout=PIPE).stdout.read().decode()
            if int(data) == 0:
                cmd = "/usr/local/bin/power-util %s status" % attribute["fru"]
                data = ""
                data = Popen(cmd, shell=True,
                             stdout=PIPE).stdout.read().decode()
                result = data.split(": ")
                if match(r"ON", result[1]) is not None:
                    key = "M2_%s_NVMe" % attribute["nvme"]
                    try:
                        return int(kv_get(key))
                    except Exception:
                        return 0
                else:
                    return 0
            else:
                return 0
        else:
            Logger.debug(
                "Sensor corresponding valid check funciton not found!")
            return 0
    except SystemExit:
        Logger.debug("SystemExit from sensor read")
        raise
    except Exception:
        Logger.crit(
            "Exception with board=%s, sensor_name=%s, cmd=%s, response=%s" %
            (board, sname, cmd, data))
    return 0
Exemplo n.º 12
0
def board_host_actions(action="None", cause="None"):
    """
    Override the method to define fan specific actions like:
    - handling host power off
    - alarming/syslogging criticals
    """
    if "host_shutdown" in action:
        if "All fans are bad" in cause:
            if not check_if_all_fantrays_ok():
                Logger.warn("Host action %s not performed for cause %s" %
                            (str(action), str(cause)))
                return False
        Logger.crit("Host is shutdown due to cause %s" % (str(cause), ))
        return host_shutdown()
    Logger.warn("Host needs action '%s' and cause '%s'" %
                (str(action), str(cause)))
    pass
Exemplo n.º 13
0
    def read(self, **kwargs):
        """
        Reads all sensors values from the util and return data read.
        There are two kinds of sensors temperature and fans. Following
        are the util usages:
        sensor util: 'util <fru name>' Reads all sensors from a specific fru
                     'util <fru name> <sensor number>' Reads sensor from a specific fru number
        fan util: 'util' Reads all fan speeds

        Arguments:
            kwargs: set of aruments needed to read from any of the util

        Return:
            blob of data read from util
        """
        cmd = self.read_source
        if "fru" in kwargs:
            if "inf" in kwargs and kwargs["inf"] is not None:
                cmd += " " + kwargs["fru"] + " --filter"
                inf = kwargs["inf"]
                for name in inf["ext_vars"]:
                    sdata = name.split(":")
                    board = sdata[0]
                    if board != kwargs["fru"]:
                        continue
                    #sname = sdata[1]
                    cmd += " " + sdata[1]
            elif "num" in kwargs and len(kwargs["num"]):
                cmd = ""
                for num in kwargs["num"]:
                    cmd += self.read_source + " " + kwargs[
                        "fru"] + " " + num + ";"
            else:
                cmd = cmd + " " + kwargs["fru"]
        Logger.debug("Reading data with cmd=%s" % cmd)
        data = ""
        try:
            data = Popen(cmd, shell=True, stdout=PIPE).stdout.read().decode()
        except SystemExit:
            Logger.debug("SystemExit from sensor read")
            raise
        except Exception:
            Logger.crit("Exception with cmd=%s response=%s" % (cmd, data))
        return data
Exemplo n.º 14
0
def sensor_valid_check(board, sname, check_name, attribute):
    cmd = ""
    data = ""
    try:
        if attribute["type"] == "power_status":
            # check power status first
            pwr_sts = bmc_read_power()
            if pwr_sts != 1:
                return 0

            fru_name = c_char_p(board.encode("utf-8"))
            snr_name = c_char_p(sname.encode("utf-8"))

            is_snr_valid = lpal_hndl.pal_sensor_is_valid(fru_name, snr_name)

            return int(is_snr_valid)

        elif attribute["type"] == "gpio":
            cmd = ["gpiocli", "get-value", "--shadow", attribute["shadow"]]
            data = check_output(cmd).decode().split("=")
            if int(data[1]) == 0:
                return 1
            else:
                return 0
        elif attribute["type"] == "prsnt":

            fru_name = c_char_p(board.encode("utf-8"))
            snr_name = c_char_p(sname.encode("utf-8"))

            is_snr_valid = lpal_hndl.pal_sensor_is_valid(fru_name, snr_name)

            return int(is_snr_valid)
        else:
            Logger.debug(
                "Sensor corresponding valid check funciton not found!")
            return -1
    except SystemExit:
        Logger.debug("SystemExit from sensor read")
        raise
    except Exception as err:
        Logger.crit(
            "Exception with board=%s, sensor_name=%s, cmd=%s, response=%s, err=%s"
            % (board, sname, cmd, data, err))
    return 0
Exemplo n.º 15
0
    def read_fans(self, fans):
        """
        Method to read all fans speeds

        Arguments:
            fans: Set of all sensor fan souces from fsc config

        Returns:
            Fan speeds set
        """
        Logger.debug("Read all fan speeds")
        result = {}
        for key, value in list(fans.items()):
            if isinstance(value.source, FscSensorSourceUtil):
                result[fans[key]] = parse_fan_util(fans[key].source.read())
            elif isinstance(fans[key].source, FscSensorSourceSysfs):
                result[fans[key]] = parse_fan_sysfs(fans[key].source.read())
            else:
                Logger.crit("Unknown source type")
        return result
Exemplo n.º 16
0
    def write(self, value):
        '''
        Writes to write_source using echo to sysfs location
        echo #value > sysfs_path

        Arguments:
            value: value to be set to the sensor

        Return:
            N/A
        '''
        if self.write_source is None:
            return
        cmd = 'echo ' + str(value) + ' > ' + self.write_source
        Logger.debug("Setting value using cmd=%s" % cmd)
        response = ''
        try:
            response = Popen(cmd, shell=True, stdout=PIPE).stdout.read().decode()
        except SystemExit:
            Logger.debug("SystemExit from sensor write")
            raise
        except Exception:
            Logger.crit("Exception with cmd=%s response=%s" % (cmd, response))
Exemplo n.º 17
0
def host_shutdown():
    SCM_POWER_COMMAND = "/usr/local/bin/wdtcli kick &> /dev/null; /usr/local/bin/wedge_power.sh off"
    TH_SWITCH_POWER_COMMAND = "source /usr/local/bin/openbmc-utils.sh; echo 0 > $SMBCPLD_SYSFS_DIR/th3_turn_on"
    GB_SWITCH_POWER_COMMAND = "source /usr/local/bin/openbmc-utils.sh; echo 0 > $SMBCPLD_SYSFS_DIR/gb_turn_on"
    switch_poweroff_cmd = ""
    brd_type = pal_get_board_type()
    if brd_type == "Wedge400":
        switch_poweroff_cmd = TH_SWITCH_POWER_COMMAND
    elif brd_type == "Wedge400C":
        switch_poweroff_cmd = GB_SWITCH_POWER_COMMAND
    else:
        Logger.crit("Cannot identify board type: %s" % brd_type)
        Logger.crit("Switch won't be resetting!")

    Logger.info("host_shutdown() executing {}".format(SCM_POWER_COMMAND))
    response = Popen(SCM_POWER_COMMAND, shell=True, stdout=PIPE).stdout.read()
    time.sleep(5)
    if switch_poweroff_cmd != "":
        Logger.info("host_shutdown() executing {}".format(switch_poweroff_cmd))
        response = Popen(switch_poweroff_cmd, shell=True,
                         stdout=PIPE).stdout.read()

    return response
Exemplo n.º 18
0
def sensor_valid_check(board, sname, check_name, attribute):
    cmd = ''
    data = ''
    try:
        if attribute['type'] == "power_status":
            #check power status first
            pwr_sts = bmc_read_power()
            if pwr_sts != 1:
                return 0

            fru_name = c_char_p(board.encode('utf-8'))
            snr_name = c_char_p(sname.encode('utf-8'))

            is_snr_valid = lpal_hndl.pal_sensor_is_valid(fru_name, snr_name)

            return int(is_snr_valid)

        elif attribute['type'] == "gpio":
            cmd = "cat /sys/class/gpio/gpio%s/value" % attribute['number']
            data = ''
            data = Popen(cmd, shell=True, stdout=PIPE).stdout.read().decode()
            if int(data) == 0:
                return 1
            else:
                return 0
        else:
            Logger.debug(
                "Sensor corresponding valid check funciton not found!")
            return -1
    except SystemExit:
        Logger.debug("SystemExit from sensor read")
        raise
    except Exception as err:
        Logger.crit(
            "Exception with board=%s, sensor_name=%s, cmd=%s, response=%s, err=%s"
            % (board, sname, cmd, data, err))
    return 0
Exemplo n.º 19
0
    def read(self, **kwargs):
        '''
        Reads all sensors values from sysfs source and return data read.
        There are two kinds of sensors temperature and fans.

        Arguments:
            kwargs: set of aruments needed to read from sysfs

        Return:
            blob of data read from sysfs
        '''

        # IF read_source has hwmon* then determine what is the hwmon device
        # and use that for reading
        readsysfs = self.read_source
        if "hwmon*" in self.read_source:
            readsysfs = self.get_hwmon_source()

        cmd = 'cat ' + readsysfs
        Logger.debug("Reading data with cmd=%s" % cmd)
        data = ''
        try:
            proc = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
            data = proc.stdout.read().decode()
            err = proc.stderr.read().decode()
            if err:
                self.read_source_fail_counter += 1
            else:
                self.read_source_fail_counter = 0
        except SystemExit:
            Logger.debug("SystemExit from sensor read")
            self.read_source_fail_counter += 1
            raise
        except Exception:
            Logger.crit("Exception with cmd=%s response=%s" % (cmd, data))
            self.read_source_fail_counter += 1
        return data
Exemplo n.º 20
0
    def update_dead_fans(self, dead_fans):
        '''
        Check for dead and recovered fans
        '''
        last_dead_fans = dead_fans.copy()
        speeds = self.machine.read_fans(self.fans)
        print("\x1b[2J\x1b[H")
        sys.stdout.flush()

        for fan, rpms in list(speeds.items()):
            Logger.info("%s speed: %d RPM" % (fan.label, rpms))
            if rpms < self.fsc_config['min_rpm']:
                dead_fans.add(fan)
                self.fsc_fan_action(fan, action='dead')
            else:
                dead_fans.discard(fan)

        recovered_fans = last_dead_fans - dead_fans
        newly_dead_fans = dead_fans - last_dead_fans
        if len(newly_dead_fans) > 0:
            if self.fanpower:
                Logger.warn("%d fans failed" % (len(dead_fans), ))
            else:
                Logger.crit("%d fans failed" % (len(dead_fans), ))
            for dead_fan in dead_fans:
                if self.fanpower:
                    Logger.warn("%s dead, %d RPM" %
                                (dead_fan.label, speeds[dead_fan]))
                else:
                    Logger.crit("%s dead, %d RPM" %
                                (dead_fan.label, speeds[dead_fan]))
                Logger.usbdbg("%s fail" % (dead_fan.label))
                fan_fail_record_path = FAN_FAIL_RECORD_DIR + '%s' % (
                    dead_fan.label)
                if not os.path.isfile(fan_fail_record_path):
                    fan_fail_record = open(fan_fail_record_path, 'w')
                    fan_fail_record.close()
        for fan in recovered_fans:
            if self.fanpower:
                Logger.warn("%s has recovered" % (fan.label, ))
            else:
                Logger.crit("%s has recovered" % (fan.label, ))
            Logger.usbdbg("%s recovered" % (fan.label))
            self.fsc_fan_action(fan, action='recover')
            fan_fail_record_path = FAN_FAIL_RECORD_DIR + '%s' % (fan.label)
            if os.path.isfile(fan_fail_record_path):
                os.remove(fan_fail_record_path)
        return dead_fans
Exemplo n.º 21
0
    def update_dead_fans(self, dead_fans):
        '''
        Check for dead and recovered fans
        '''
        last_dead_fans = dead_fans.copy()
        speeds = self.machine.read_fans(self.fans)
        print("\x1b[2J\x1b[H")
        sys.stdout.flush()

        for fan, rpms in speeds.items():
            Logger.info("%s speed: %d RPM" % (fan.label, rpms))
            if rpms < self.fsc_config['min_rpm']:
                dead_fans.add(fan)
                self.fsc_fan_action(fan, action='dead')
            else:
                dead_fans.discard(fan)

        recovered_fans = last_dead_fans - dead_fans
        newly_dead_fans = dead_fans - last_dead_fans
        if len(newly_dead_fans) > 0:
            if self.fanpower:
                Logger.warn("%d fans failed" % (len(dead_fans), ))
            else:
                Logger.crit("%d fans failed" % (len(dead_fans), ))
            for dead_fan in dead_fans:
                if self.fanpower:
                    Logger.warn("%s dead, %d RPM" %
                                (dead_fan.label, speeds[dead_fan]))
                else:
                    Logger.crit("%s dead, %d RPM" %
                                (dead_fan.label, speeds[dead_fan]))
                Logger.usbdbg("%s fail" % (dead_fan.label))
        for fan in recovered_fans:
            if self.fanpower:
                Logger.warn("%s has recovered" % (fan.label, ))
            else:
                Logger.crit("%s has recovered" % (fan.label, ))
            Logger.usbdbg("%s recovered" % (fan.label))
            self.fsc_fan_action(fan, action='recover')
        return dead_fans
Exemplo n.º 22
0
    def run(self, sensors, dt):
        ctx = {"dt": dt}
        outmin = 0
        fail_ssd_count = 0
        sensor_index = 0
        cause_boost_count = 0
        no_sane_flag = 0
        mode = 0

        for v in self.expr_meta["ext_vars"]:
            sensor_valid_flag = 1
            sdata = v.split(":")
            board = sdata[0]
            sname = sdata[1]
            if self.sensor_valid_check != None:
                for check_name in self.sensor_valid_check:
                    if re.match(check_name, sname, re.IGNORECASE) != None:
                        self.sensor_valid_cur[
                            sensor_index] = fsc_board.sensor_valid_check(
                                board,
                                sname,
                                check_name,
                                self.sensor_valid_check[check_name]
                                ["attribute"],
                            )
                        # If current or previous sensor valid status is 0, ignore this sensor reading.
                        # Only when both are 1, goes to sensor check process
                        if (self.sensor_valid_cur[sensor_index]
                                == 0) or (self.sensor_valid_pre[sensor_index]
                                          == 0):
                            sensor_valid_flag = 0
                            self.missing_sensor_assert_retry[sensor_index] = 0
                        break

            if sensor_valid_flag == 1:
                if sname in sensors[board]:
                    self.missing_sensor_assert_retry[sensor_index] = 0
                    if self.missing_sensor_assert_flag[sensor_index]:
                        Logger.crit("DEASSERT: Zone%d Missing sensors: %s" %
                                    (self.counter, v))
                        self.missing_sensor_assert_flag[sensor_index] = False

                    sensor = sensors[board][sname]
                    ctx[v] = sensor.value
                    if sensor.status in ["ucr"]:
                        Logger.warn("Sensor %s reporting status %s" %
                                    (sensor.name, sensor.status))
                        outmin = max(outmin, self.transitional)
                        if outmin == self.transitional:
                            mode = fan_mode["trans_mode"]
                    else:
                        if self.sensor_fail == True:
                            sensor_fail_record_path = SENSOR_FAIL_RECORD_DIR + v
                            if not os.path.isdir(SENSOR_FAIL_RECORD_DIR):
                                os.mkdir(SENSOR_FAIL_RECORD_DIR)
                            if (sensor.status in [
                                    "na"
                            ]) and (self.sensor_valid_cur[sensor_index] != -1):
                                if re.match(r"SSD", sensor.name) != None:
                                    fail_ssd_count = fail_ssd_count + 1
                                else:
                                    Logger.warn("%s Fail" % v)
                                    outmin = max(outmin, self.boost)
                                    cause_boost_count += 1
                                if not os.path.isfile(sensor_fail_record_path):
                                    sensor_fail_record = open(
                                        sensor_fail_record_path, "w")
                                    sensor_fail_record.close()
                                if outmin == self.boost:
                                    mode = fan_mode["boost_mode"]
                            else:
                                if os.path.isfile(sensor_fail_record_path):
                                    os.remove(sensor_fail_record_path)
                else:
                    if (not self.missing_sensor_assert_flag[sensor_index]
                        ) and (self.missing_sensor_assert_retry[sensor_index]
                               >= 2):
                        Logger.crit("ASSERT: Zone%d Missing sensors: %s" %
                                    (self.counter, v))
                        self.missing_sensor_assert_flag[sensor_index] = True
                    if self.missing_sensor_assert_retry[sensor_index] < 2:
                        self.missing_sensor_assert_retry[sensor_index] += 1
                    # evaluation tries to ignore the effects of None values
                    # (e.g. acts as 0 in max/+)
                    ctx[v] = None
            self.sensor_valid_pre[sensor_index] = self.sensor_valid_cur[
                sensor_index]
            sensor_index += 1

        if verbose:
            (exprout, dxstr) = self.expr.dbgeval(ctx)
            Logger.info(dxstr + " = " + str(exprout))
        else:
            exprout = self.expr.eval(ctx)
            Logger.info(self.expr_str + " = " + str(exprout))
        # If *all* sensors in the top level max() report None, the
        # expression will report None
        if (not exprout) and (outmin == 0):
            if not self.transitional_assert_flag:
                Logger.crit("ASSERT: Zone%d No sane fan speed could be \
                    calculated! Using transitional speed." % (self.counter))
            exprout = self.transitional
            mode = fan_mode["trans_mode"]
            no_sane_flag = 1
            self.transitional_assert_flag = True
        else:
            if self.transitional_assert_flag:
                Logger.crit("DEASSERT: Zone%d No sane fan speed could be \
                    calculated! Using transitional speed." % (self.counter))
            self.transitional_assert_flag = False

        if self.fail_sensor_type != None:
            if "SSD_sensor_fail" in list(self.fail_sensor_type.keys()):
                if self.fail_sensor_type["SSD_sensor_fail"] == True:
                    if fail_ssd_count != 0:
                        if self.ssd_progressive_algorithm != None:
                            if "offset_algorithm" in list(
                                    self.ssd_progressive_algorithm.keys()):
                                list_index = 0
                                for i in self.ssd_progressive_algorithm[
                                        "offset_algorithm"]:
                                    list_index = list_index + 1
                                    if fail_ssd_count <= i[0]:
                                        exprout = exprout + i[1]
                                        no_sane_flag = 0
                                        break
                                    else:
                                        if list_index == len(
                                                self.ssd_progressive_algorithm[
                                                    "offset_algorithm"]):
                                            outmin = max(outmin, self.boost)
                                            cause_boost_count += 1
                                            if outmin == self.boost:
                                                mode = fan_mode["boost_mode"]

        boost_record_path = RECORD_DIR + "sensor_fail_boost"
        if cause_boost_count != 0:
            if not os.path.isfile(boost_record_path):
                sensor_fail_boost_record = open(boost_record_path, "w")
                sensor_fail_boost_record.close()
        else:
            if os.path.isfile(boost_record_path):
                os.remove(boost_record_path)

        if not exprout:
            exprout = 0
        if exprout < outmin:
            exprout = outmin
        else:
            if no_sane_flag != 1:
                mode = fan_mode["normal_mode"]
        self.get_set_fan_mode(mode, action="write")
        exprout = clamp(exprout, 0, 100)
        return exprout
Exemplo n.º 23
0
    global wdfile
    board_callout(callout='init_fans', boost=DEFAULT_INIT_TRANSITIONAL)
    Logger.warn("killed by signal %d" % (signum,))
    if signum == signal.SIGQUIT and wdfile:
        Logger.info("Killed with SIGQUIT - stopping watchdog.")
        wdfile.write(b"X")
        wdfile.flush()
        wdfile.close()
        wdfile = None
    sys.exit('killed')


if __name__ == "__main__":
    try:
        signal.signal(signal.SIGTERM, handle_term)
        signal.signal(signal.SIGINT, handle_term)
        signal.signal(signal.SIGQUIT, handle_term)
        if len(sys.argv) > 1:
            llevel = sys.argv[1]
        else:
            llevel = 'warning'
        fscd = Fscd(log_level=llevel)
        fscd.run()
    except Exception:
        board_callout(callout='init_fans', boost=DEFAULT_INIT_TRANSITIONAL)
        (etype, e) = sys.exc_info()[:2]
        Logger.crit("failed, exception: " + str(etype))
        traceback.print_exc()
        for line in traceback.format_exc().split('\n'):
            Logger.crit(line)
Exemplo n.º 24
0
    def run(self, sensors, dt):
        ctx = {'dt': dt}
        outmin = 0
        fail_ssd_count = 0
        sensor_index = 0

        for v in self.expr_meta['ext_vars']:
            sensor_valid_flag = 1
            board, sname = v.split(":")
            if self.sensor_valid_check != None:
                for check_name in self.sensor_valid_check:
                    if re.match(check_name, sname, re.IGNORECASE) != None:
                        self.sensor_valid_cur[sensor_index] = fsc_board.sensor_valid_check(board, sname, check_name, self.sensor_valid_check[check_name]["attribute"])
                        #If current or previous sensor valid status is 0, ignore this sensor reading.
                        #Only when both are 1, goes to sensor check process
                        if (self.sensor_valid_cur[sensor_index] == 0) or (self.sensor_valid_pre[sensor_index] == 0):
                            sensor_valid_flag = 0
                            self.missing_sensor_assert_retry[sensor_index] = 0
                        break

            if sensor_valid_flag == 1:
                if sname in sensors[board]:
                    self.missing_sensor_assert_retry[sensor_index] = 0
                    if self.missing_sensor_assert_flag[sensor_index]:
                        Logger.crit('DEASSERT: Zone%d Missing sensors: %s' % (self.counter, v))
                        self.missing_sensor_assert_flag[sensor_index] = False

                    sensor = sensors[board][sname]
                    ctx[v] = sensor.value
                    if sensor.status in ['ucr']:
                        Logger.warn('Sensor %s reporting status %s' % (sensor.name, sensor.status))
                        outmin = max(outmin, self.transitional)
                    else:
                        if self.sensor_fail == True:
                            if (sensor.status in ['na']) and (self.sensor_valid_cur[sensor_index] != -1):
                                if re.match(r'.+_C[2-4]_[0-3]_NVME_.+', sensor.name) != None:
                                    Logger.warn("%s Fail" % v)
                                    outmin = max(outmin, self.boost)
                                elif re.match(r'SSD', sensor.name) != None or re.match(r'(.*)nvme(.*)', sname) != None:
                                    fail_ssd_count = fail_ssd_count + 1
                                else:
                                    Logger.warn("%s Fail" % v)
                                    outmin = max(outmin, self.boost)
                else:
                    if (not self.missing_sensor_assert_flag[sensor_index]) and (self.missing_sensor_assert_retry[sensor_index] >= 2):
                        Logger.crit('ASSERT: Zone%d Missing sensors: %s' % (self.counter, v))
                        self.missing_sensor_assert_flag[sensor_index] = True
                    if (self.missing_sensor_assert_retry[sensor_index] < 2):
                        self.missing_sensor_assert_retry[sensor_index] += 1
                    # evaluation tries to ignore the effects of None values
                    # (e.g. acts as 0 in max/+)
                    ctx[v] = None
            self.sensor_valid_pre[sensor_index] = self.sensor_valid_cur[sensor_index]
            sensor_index += 1

        if verbose:
            (exprout, dxstr) = self.expr.dbgeval(ctx)
            Logger.info(dxstr + " = " + str(exprout))
        else:
            exprout = self.expr.eval(ctx)
            Logger.info(self.expr_str + " = " + str(exprout))
        # If *all* sensors in the top level max() report None, the
        # expression will report None
        if (not exprout) and (outmin == 0):
            if not self.transitional_assert_flag:
                Logger.crit('ASSERT: Zone%d No sane fan speed could be \
                    calculated! Using transitional speed.' % (self.counter))
            exprout = self.transitional
            self.transitional_assert_flag = True
        else:
            if self.transitional_assert_flag:
                Logger.crit('DEASSERT: Zone%d No sane fan speed could be \
                    calculated! Using transitional speed.' % (self.counter))
            self.transitional_assert_flag = False

        if self.fail_sensor_type != None:
            if 'SSD_sensor_fail' in list(self.fail_sensor_type.keys()):
                if self.fail_sensor_type['SSD_sensor_fail'] == True:
                    if fail_ssd_count != 0:
                        if self.ssd_progressive_algorithm != None:
                            if 'offset_algorithm' in list(self.ssd_progressive_algorithm.keys()):
                                list_index = 0
                                for i in self.ssd_progressive_algorithm['offset_algorithm']:
                                    list_index = list_index + 1
                                    if fail_ssd_count <= i[0]:
                                        exprout = exprout + i[1]
                                        break
                                    else:
                                        if list_index == len(self.ssd_progressive_algorithm['offset_algorithm']):
                                           outmin = max(outmin, self.boost)
        if not exprout:
            exprout = 0
        if exprout < outmin:
            exprout = outmin
        exprout = clamp(exprout, 0, 100)
        return exprout
Exemplo n.º 25
0
    def run(self, sensors, dt):
        ctx = {'dt': dt}
        outmin = 0
        fail_ssd_count = 0
        missing = set()

        for v in self.expr_meta['ext_vars']:
            board, sname = v.split(":")
            if sname in sensors[board]:
                sensor = sensors[board][sname]
                ctx[v] = sensor.value
                if sensor.status in ['ucr']:
                    Logger.warn('Sensor %s reporting status %s' %
                                (sensor.name, sensor.status))
                    outmin = self.transitional

                if self.fail_sensor_type != None:
                    if 'standby_sensor_fail' in self.fail_sensor_type.keys():
                        if self.fail_sensor_type[
                                'standby_sensor_fail'] == True:
                            if sensor.status in ['na']:
                                if re.match(r'SOC', sensor.name) != None:
                                    if 'server_sensor_fail' in self.fail_sensor_type.keys(
                                    ):
                                        if self.fail_sensor_type[
                                                'server_sensor_fail'] == True:
                                            ret = fsc_board.get_power_status(
                                                board)
                                            if ret:
                                                Logger.debug(
                                                    "Server Sensor Fail")
                                                outmin = self.boost
                                                break
                                elif re.match(r'SSD', sensor.name) != None:
                                    if 'SSD_sensor_fail' in self.fail_sensor_type.keys(
                                    ):
                                        if self.fail_sensor_type[
                                                'SSD_sensor_fail'] == True:
                                            fail_ssd_count = fail_ssd_count + 1
                                else:
                                    Logger.debug("Standby Sensor Fail")
                                    outmin = self.boost
                                    break
            else:
                missing.add(v)
                # evaluation tries to ignore the effects of None values
                # (e.g. acts as 0 in max/+)
                ctx[v] = None
        if missing:
            Logger.warn('Missing sensors: %s' % (', '.join(missing), ))
        if verbose:
            (exprout, dxstr) = self.expr.dbgeval(ctx)
            Logger.info(dxstr + " = " + str(exprout))
        else:
            exprout = self.expr.eval(ctx)
            Logger.info(self.expr_str + " = " + str(exprout))
        # If *all* sensors in the top level max() report None, the
        # expression will report None
        if not exprout:
            if not self.transitional_assert_flag:
                Logger.crit('ASSERT: Zone%d No sane fan speed could be \
                    calculated! Using transitional speed.' % (self.counter))
            exprout = self.transitional
            self.transitional_assert_flag = True
        else:
            if self.transitional_assert_flag:
                Logger.crit('DEASSERT: Zone%d No sane fan speed could be \
                    calculated! Using transitional speed.' % (self.counter))
            self.transitional_assert_flag = False

        if self.fail_sensor_type != None:
            if 'SSD_sensor_fail' in self.fail_sensor_type.keys():
                if self.fail_sensor_type['SSD_sensor_fail'] == True:
                    if fail_ssd_count != 0:
                        if self.ssd_progressive_algorithm != None:
                            if 'offset_algorithm' in self.ssd_progressive_algorithm.keys(
                            ):
                                list_index = 0
                                for i in self.ssd_progressive_algorithm[
                                        'offset_algorithm']:
                                    list_index = list_index + 1
                                    if fail_ssd_count <= i[0]:
                                        exprout = exprout + i[1]
                                        break
                                    else:
                                        if list_index == len(
                                                self.ssd_progressive_algorithm[
                                                    'offset_algorithm']):
                                            outmin = self.boost

        if exprout < outmin:
            exprout = outmin
        exprout = clamp(exprout, 0, 100)
        return exprout
Exemplo n.º 26
0
    def run(self, sensors, ctx, ignore_mode):
        outmin = 0
        fail_ssd_count = 0
        valid_m2_count = 0
        sensor_index = 0
        cause_boost_count = 0
        no_sane_flag = 0
        display_progressive_flag = 0
        mode = 0

        for v in self.expr_meta["ext_vars"]:
            sensor_valid_flag = 1
            sdata = v.split(":")
            board = sdata[0]
            sname = sdata[1]
            if self.sensor_valid_check != None:
                for check_name in self.sensor_valid_check:
                    if re.match(check_name, sname, re.IGNORECASE) != None:
                        self.sensor_valid_cur[
                            sensor_index] = fsc_board.sensor_valid_check(
                                board,
                                sname,
                                check_name,
                                self.sensor_valid_check[check_name]
                                ["attribute"],
                            )
                        # If current or previous sensor valid status is 0, ignore this sensor reading.
                        # Only when both are 1, goes to sensor check process
                        if (self.sensor_valid_cur[sensor_index]
                                == 0) or (self.sensor_valid_pre[sensor_index]
                                          == 0):
                            sensor_valid_flag = 0
                            self.missing_sensor_assert_retry[sensor_index] = 0
                        break

            if sensor_valid_flag == 1:
                if sname in sensors[board]:
                    self.missing_sensor_assert_retry[sensor_index] = 0
                    if self.missing_sensor_assert_flag[sensor_index]:
                        Logger.crit("DEASSERT: Zone%d Missing sensors: %s" %
                                    (self.counter, v))
                        self.missing_sensor_assert_flag[sensor_index] = False

                    sensor = sensors[board][sname]
                    ctx[v] = sensor.value
                    if re.match(r".*temp_dev", sname) != None:
                        valid_m2_count = valid_m2_count + 1
                    if sensor.status in ["ucr"]:
                        Logger.warn("Sensor %s reporting status %s" %
                                    (sensor.name, sensor.status))
                        outmin = max(outmin, self.transitional)
                        if outmin == self.transitional:
                            mode = fan_mode["trans_mode"]
                    else:
                        if self.sensor_fail == True:
                            sensor_fail_record_path = SENSOR_FAIL_RECORD_DIR + v
                            if not os.path.isdir(SENSOR_FAIL_RECORD_DIR):
                                os.mkdir(SENSOR_FAIL_RECORD_DIR)
                            if (sensor.status in [
                                    "na"
                            ]) and (self.sensor_valid_cur[sensor_index] != -1):
                                if (re.match(r"SSD", sensor.name) !=
                                        None) or (re.match(
                                            r".*temp_dev", sname) != None):
                                    fail_ssd_count = fail_ssd_count + 1
                                    Logger.warn("M.2 Device %s Fail" % v)
                                else:
                                    Logger.warn("%s Fail" % v)
                                    outmin = max(outmin, self.boost)
                                    cause_boost_count += 1
                                if not os.path.isfile(sensor_fail_record_path):
                                    sensor_fail_record = open(
                                        sensor_fail_record_path, "w")
                                    sensor_fail_record.close()
                                if outmin == self.boost:
                                    mode = fan_mode["boost_mode"]
                            else:
                                if os.path.isfile(sensor_fail_record_path):
                                    os.remove(sensor_fail_record_path)
                else:
                    if (not self.missing_sensor_assert_flag[sensor_index]
                        ) and (self.missing_sensor_assert_retry[sensor_index]
                               >= 2):
                        Logger.crit("ASSERT: Zone%d Missing sensors: %s" %
                                    (self.counter, v))
                        self.missing_sensor_assert_flag[sensor_index] = True
                    if self.missing_sensor_assert_retry[sensor_index] < 2:
                        self.missing_sensor_assert_retry[sensor_index] += 1
                    # evaluation tries to ignore the effects of None values
                    # (e.g. acts as 0 in max/+)
                    ctx[v] = None
            else:
                if sname in sensors[board]:
                    if self.sensor_fail == True:
                        sensor_fail_record_path = SENSOR_FAIL_RECORD_DIR + v
                        if os.path.isfile(sensor_fail_record_path):
                            os.remove(sensor_fail_record_path)

            self.sensor_valid_pre[sensor_index] = self.sensor_valid_cur[
                sensor_index]
            sensor_index += 1

        if verbose:
            (exprout, dxstr) = self.expr.dbgeval(ctx)
            Logger.info(dxstr + " = " + str(exprout))
        else:
            exprout = self.expr.eval_driver(ctx)
            Logger.info(self.expr_str + " = " + str(exprout))
        # If *all* sensors in the top level max() report None, the
        # expression will report None
        if (not exprout) and (outmin == 0):
            if not self.transitional_assert_flag:
                Logger.crit(
                    "ASSERT: Zone%d No sane fan speed could be calculated! Using transitional speed."
                    % (self.counter))
            exprout = self.transitional
            mode = fan_mode["trans_mode"]
            no_sane_flag = 1
            self.transitional_assert_flag = True
        else:
            if self.transitional_assert_flag:
                Logger.crit(
                    "DEASSERT: Zone%d No sane fan speed could be calculated! Using transitional speed."
                    % (self.counter))
            self.transitional_assert_flag = False

        if self.fail_sensor_type != None:
            progressive_mode = True
            if ("M2_sensor_fail" in list(self.fail_sensor_type.keys())) and (
                    "M2_sensor_count" in list(self.fail_sensor_type.keys())):
                if (self.fail_sensor_type["M2_sensor_fail"] == True) and (
                        self.fail_sensor_type["M2_sensor_count"] > 0):
                    if valid_m2_count == 0:
                        if fsc_board.all_slots_power_off() == False:
                            # Missing all module (no M.2 device)
                            outmin = max(outmin, self.boost)
                            cause_boost_count += 1
                            mode = fan_mode["boost_mode"]
                            progressive_mode = False
                        else:
                            # All slots power off, do not boost up
                            progressive_mode = False
                    elif valid_m2_count != self.fail_sensor_type[
                            "M2_sensor_count"]:
                        # Missing some module (M.2 devices partially populated)
                        progressive_mode = False
                        cause_boost_count += 1
                    else:
                        # M.2 devices fully populated
                        if cause_boost_count != 0:
                            # other boost reasons: e.g. other sensors (not M.2 devices' sensors) fail to read sensors
                            progressive_mode = False
                        else:
                            if fail_ssd_count != 0:
                                # M.2 devices progressive_mode
                                # handle M.2 devices/SSD fail to read case
                                cause_boost_count += 1  # show out sensor fail record
                                display_progressive_flag = (
                                    1)  # do not override by normal mode
                                mode = fan_mode["progressive_mode"]
                            else:
                                # M.2 devices noraml mode
                                progressive_mode = False

            if progressive_mode and ("SSD_sensor_fail" in list(
                    self.fail_sensor_type.keys())):
                if self.fail_sensor_type["SSD_sensor_fail"] == True:
                    if fail_ssd_count != 0:
                        if self.ssd_progressive_algorithm != None:
                            if "offset_algorithm" in list(
                                    self.ssd_progressive_algorithm.keys()):
                                list_index = 0
                                for i in self.ssd_progressive_algorithm[
                                        "offset_algorithm"]:
                                    list_index = list_index + 1
                                    if fail_ssd_count <= i[0]:
                                        exprout = exprout + i[1]
                                        no_sane_flag = 0
                                        break
                                    else:
                                        if list_index == len(
                                                self.ssd_progressive_algorithm[
                                                    "offset_algorithm"]):
                                            outmin = max(outmin, self.boost)
                                            cause_boost_count += 1
                                            if outmin == self.boost:
                                                mode = fan_mode["boost_mode"]

        boost_record_path = RECORD_DIR + "sensor_fail_boost"
        if cause_boost_count != 0:
            if not os.path.isfile(boost_record_path):
                sensor_fail_boost_record = open(boost_record_path, "w")
                sensor_fail_boost_record.close()
        else:
            if os.path.isfile(boost_record_path):
                os.remove(boost_record_path)

        if not exprout:
            exprout = 0
        if exprout < outmin:
            exprout = outmin
        else:
            if (no_sane_flag != 1) and (display_progressive_flag != 1):
                mode = fan_mode["normal_mode"]
        if not ignore_mode:
            self.get_set_fan_mode(mode, action="write")
        exprout = clamp(exprout, 0, 100)
        return exprout