def __init__(self, fan_name, pTable): try: self.fan_num = int(fan_name) if "label" in pTable: self.label = pTable["label"] else: self.label = "Fan %d" % (self.fan_num) if "sysfs" in pTable["read_source"]: if "write_source" in pTable: self.source = FscSensorSourceSysfs( name=fan_name, read_source=pTable["read_source"]["sysfs"], write_source=pTable["write_source"]["sysfs"], ) else: self.source = FscSensorSourceSysfs( name=fan_name, read_source=pTable["read_source"]["sysfs"]) if "util" in pTable["read_source"]: if "write_source" in pTable: self.source = FscSensorSourceUtil( name=fan_name, read_source=pTable["read_source"]["util"], write_source=pTable["write_source"]["util"], ) else: self.source = FscSensorSourceUtil( name=fan_name, read_source=pTable["read_source"]["util"]) except Exception: Logger.error("Unknown Fan source type")
def board_host_actions(action="None", cause="None"): if "host_shutdown" in action: Logger.crit("Host is shutdown due to cause %s" % (str(cause), )) return yamp_host_shutdown() Logger.warn("Host needs action '%s' and cause '%s'" % (str(action), str(cause))) pass
def __init__(self, fan_name, pTable): try: self.fan_num = int(fan_name) if 'label' in pTable: self.label = pTable['label'] else: self.label = "Fan %d" % (self.fan_num) if 'sysfs' in pTable['read_source']: if 'write_source' in pTable: self.source = FscSensorSourceSysfs( name=fan_name, read_source=pTable['read_source']['sysfs'], write_source=pTable['write_source']['sysfs']) else: self.source = FscSensorSourceSysfs( name=fan_name, read_source=pTable['read_source']['sysfs']) if 'util' in pTable['read_source']: if 'write_source' in pTable: self.source = FscSensorSourceUtil( name=fan_name, read_source=pTable['read_source']['util'], write_source=pTable['write_source']['util']) else: self.source = FscSensorSourceUtil( name=fan_name, read_source=pTable['read_source']['util']) except Exception: Logger.error("Unknown Fan source type")
def board_callout(callout='None', **kwargs): ''' Override this method for defining board specific callouts: - Exmaple chassis intrusion ''' Logger.warn("Need to perform callout action %s" % callout) pass
def __init__(self, config=RAMFS_CONFIG, zone_config=CONFIG_DIR, log_level="warning"): Logger.start("fscd", log_level) Logger.info("Starting fscd") self.zone_config = zone_config self.fsc_config = self.get_fsc_config(config) # json dump from config self.boost = self.DEFAULT_BOOST self.non_fanfail_limited_boost = None self.boost_type = self.DEFAULT_BOOST_TYPE self.transitional = self.DEFAULT_TRANSITIONAL self.ramp_rate = self.DEFAULT_RAMP_RATE self.sensor_fail = None self.ssd_progressive_algorithm = None self.sensor_valid_check = None self.fail_sensor_type = None self.fan_dead_boost = None self.fan_fail = None self.fan_recovery_pending = False self.fan_recovery_time = None self.fan_limit_upper_pwm = None self.fan_limit_lower_pwm = None self.sensor_filter_all = False self.pwm_sensor_boost_value = None self.output_max_boost_pwm = False
def get_config_params(self): self.transitional = self.fsc_config["pwm_transition_value"] self.boost = self.fsc_config["pwm_boost_value"] if "fan_limit_upper_pwm" in self.fsc_config: self.fan_limit_upper_pwm = self.fsc_config["fan_limit_upper_pwm"] if "fan_limit_lower_pwm" in self.fsc_config: self.fan_limit_lower_pwm = self.fsc_config["fan_limit_lower_pwm"] if "non_fanfail_limited_boost_value" in self.fsc_config: self.non_fanfail_limited_boost = self.fsc_config[ "non_fanfail_limited_boost_value"] self.sensor_filter_all = self.fsc_config.get("sensor_filter_all", False) if "boost" in self.fsc_config and "fan_fail" in self.fsc_config[ "boost"]: self.fan_fail = self.fsc_config["boost"]["fan_fail"] if "boost" in self.fsc_config and "progressive" in self.fsc_config[ "boost"]: if self.fsc_config["boost"]["progressive"]: self.boost_type = "progressive" if "fan_dead_boost" in self.fsc_config: self.fan_dead_boost = self.fsc_config["fan_dead_boost"] self.all_fan_fail_counter = 0 if "output_max_boost_pwm" in self.fsc_config: self.output_max_boost_pwm = self.fsc_config["output_max_boost_pwm"] if "boost" in self.fsc_config and "sensor_fail" in self.fsc_config[ "boost"]: self.sensor_fail = self.fsc_config["boost"]["sensor_fail"] if self.sensor_fail: if "pwm_sensor_boost_value" in self.fsc_config: self.pwm_sensor_boost_value = self.fsc_config[ "pwm_sensor_boost_value"] if "fail_sensor_type" in self.fsc_config: self.fail_sensor_type = self.fsc_config["fail_sensor_type"] if "ssd_progressive_algorithm" in self.fsc_config: self.ssd_progressive_algorithm = self.fsc_config[ "ssd_progressive_algorithm"] if "sensor_valid_check" in self.fsc_config: self.sensor_valid_check = self.fsc_config["sensor_valid_check"] self.watchdog = self.fsc_config["watchdog"] if "fanpower" in self.fsc_config: self.fanpower = self.fsc_config["fanpower"] else: self.fanpower = False if "chassis_intrusion" in self.fsc_config: self.chassis_intrusion = self.fsc_config["chassis_intrusion"] else: self.chassis_intrusion = False if "enable_fsc_sensor_check" in self.fsc_config: self.enable_fsc_sensor_check = self.fsc_config[ "enable_fsc_sensor_check"] else: self.enable_fsc_sensor_check = False if "ramp_rate" in self.fsc_config: self.ramp_rate = self.fsc_config["ramp_rate"] if self.watchdog: Logger.info("watchdog pinging enabled") kick_watchdog() self.interval = self.fsc_config["sample_interval_ms"] / 1000.0 if "fan_recovery_time" in self.fsc_config: self.fan_recovery_time = self.fsc_config["fan_recovery_time"]
def get_sensor_tuples(fru_name, sensor_num, sensor_sources): """ Method to walk through each of the sensor sources to build the tuples of the form 'SensorValue' Arguments: fru_name: fru where the sensors should be read from sensor_sources: Set of all sensor souces from fsc config Returns: SensorValue tuples """ result = {} for key, value in list(sensor_sources.items()): if isinstance(value.source, FscSensorSourceUtil): result = parse_all_sensors_util( sensor_sources[key].source.read(fru=fru_name, num=sensor_num) ) break # Hack: util reads all sensors elif isinstance(sensor_sources.get(key).source, FscSensorSourceSysfs): symbolized_key, tuple = get_sensor_tuple_sysfs( key, sensor_sources[key].source.read(), sensor_sources[key].source.read_source_fail_counter, sensor_sources[key].source.read_source_wrong_counter, ) result[symbolized_key] = tuple else: Logger.crit("Unknown source type") return result
def sensor_valid_check(board, sname, check_name, attribute): status = c_uint8(0) try: if attribute["type"] == "power_status": lpal_hndl.pal_get_server_power(int(fru_map[board]["slot_num"]), byref(status)) if (status.value == 1): if match(r"soc_cpu", sname) is not None: return 1 elif match(r"soc_therm", sname) is not None: return 1 elif match(r"soc_dimm", sname) is not None: # check DIMM present file = "/mnt/data/kv_store/sys_config/" + fru_map[board]["name"] + part_name_map[sname[8]] with open(file, "r") as f: dimm_sts = f.readline() if re.search(r"([a-zA-Z0-9])", dimm_sts): return 1 else: return 0 else: return 0 return 0 except SystemExit: Logger.debug("SystemExit from sensor read") raise except Exception: Logger.warn("Exception with board=%s, sensor_name=%s" % (board, sname)) return 0
def build_zones(self): self.zones = [] counter = 0 for name, data in list(self.fsc_config["zones"].items()): filename = data["expr_file"] with open(os.path.join(self.zone_config, filename), "r") as exf: source = exf.read() Logger.info("Compiling FSC expression for zone:") Logger.info(source) (expr, inf) = fsc_expr.make_eval_tree(source, self.profiles) for name in inf["ext_vars"]: sdata = name.split(":") board = sdata[0] # sname never used. so comment out (avoid lint error) # sname = sdata[1] if board not in self.machine.frus: self.machine.nums[board] = [] self.machine.frus.add(board) if len(sdata) == 3: self.machine.nums[board].append(sdata[2]) zone = Zone( data["pwm_output"], expr, inf, self.transitional, counter, self.boost, self.sensor_fail, self.sensor_valid_check, self.fail_sensor_type, self.ssd_progressive_algorithm, ) counter += 1 self.zones.append(zone)
def board_fan_actions(fan, action='None'): ''' Override the method to define fan specific actions like: - handling dead fan - handling fan led ''' Logger.warn("%s needs action %s" % (fan.label, str(action),)) pass
def board_host_actions(action='None', cause='None'): ''' Override the method to define fan specific actions like: - handling host power off - alarming/syslogging criticals ''' Logger.warn("Host needs action %s and cause %s" % (str(action),str(cause),)) pass
def board_fan_actions(fan, action="None"): """ Override the method to define fan specific actions like: - handling dead fan - handling fan led """ Logger.warn("%s needs action %s" % (fan.label, str(action))) pass
def board_host_actions(action="None", cause="None"): """ Override the method to define fan specific actions like: - handling host power off - alarming/syslogging criticals """ Logger.warn("Host needs action %s and cause %s" % (str(action), str(cause))) pass
def handle_term(signum, frame): global wdfile board_callout(callout='init_fans', boost=DEFAULT_INIT_TRANSITIONAL) Logger.warn("killed by signal %d" % (signum, )) if signum == signal.SIGQUIT and wdfile: Logger.info("Killed with SIGQUIT - stopping watchdog.") stop_watchdog() sys.exit('killed')
def board_fan_actions(fan, action='None'): if "led" in action: yamp_set_fan_led(fan.label, color=action) else: Logger.warn("fscd: %s has no action %s" % ( fan.label, str(action), )) pass
def sensor_valid_check(board, sname, check_name, attribute): status = c_uint8(0) is_valid_check = False try: if attribute["type"] == "power_status": lpal_hndl.pal_get_server_power(int(fru_map[board]["slot_num"]), byref(status)) if (status.value == 1): # power on if match(r"soc_cpu|soc_therm", sname) is not None: is_valid_check = True elif match(r"spe_ssd", sname) is not None: # get SSD present status cmd = '/usr/bin/bic-util slot1 0xe0 0x2 0x9c 0x9c 0x0 0x15 0xe0 0x34 0x9c 0x9c 0x0 0x0 0x3' response = Popen(cmd, shell=True, stdout=PIPE).stdout.read() response = response.decode() # check the completion code if response.split(' ')[6] != '00': return 0 prsnt_bits = response.split(' ')[-3] int_val = int('0x' + prsnt_bits, 16) ssd_id = int(sname[7]) if int_val & (1 << ssd_id): return 1 else: return 0 else: suffix = "" if match(r"1ou_m2", sname) is not None: # 1ou_m2_a_temp. key is at 7 suffix = m2_1ou_name_map[sname[7]] elif match(r"soc_dimm", sname) is not None: # soc_dimma_temp. key is at 8 suffix = dimm_location_name_map[sname[8]] file = "/mnt/data/kv_store/sys_config/" + fru_map[board][ "name"] + suffix if is_dev_prsnt(file) == True: is_valid_check = True if is_valid_check == True: # Check power status again file = "/tmp/cache_store/" + host_ready_map[board] if not os.path.exists(file): return 0 with open(file, "r") as f: flag_status = f.read() if (flag_status == "1"): return 1 return 0 except SystemExit: Logger.debug("SystemExit from sensor read") raise except Exception: Logger.warn("Exception with board=%s, sensor_name=%s" % (board, sname)) return 0
def board_callout(callout="None", **kwargs): if "init_fans" in callout: boost = 100 if "boost" in kwargs: boost = kwargs["boost"] Logger.info("FSC init fans to boost=%s " % str(boost)) return yamp_set_all_pwm(boost) else: Logger.warn("Need to perform callout action %s" % callout) pass
def stop_watchdog(): """kick the watchdog device. """ f = subprocess.Popen(WDTCLI_CMD + ' stop', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) info, err = f.communicate() if len(err) != 0: Logger.error("failed to kick watchdog device")
def board_host_actions(action="None", cause="None"): """ Override the method to define fan specific actions like: - handling host power off - alarming/syslogging criticals """ if "host_shutdown" in action: Logger.crit("Host is shutdown due to cause %s" % (str(cause),)) return host_shutdown() Logger.warn("Host needs action '%s' and cause '%s'" % (str(action), str(cause))) pass
def host_shutdown(): MAIN_POWER = "/sys/bus/i2c/drivers/smbcpld/12-003e/cpld_in_p1220" USERVER_POWER = "/sys/bus/i2c/drivers/scmcpld/2-0035/com_exp_pwr_enable" cmd = 'echo 0 > ' + USERVER_POWER Logger.info("host_shutdown() executing {}".format(cmd)) response = Popen(cmd, shell=True, stdout=PIPE).stdout.read() time.sleep(5) cmd = 'echo 0 > ' + MAIN_POWER Logger.info("host_shutdown() executing {}".format(cmd)) response = Popen(cmd, shell=True, stdout=PIPE).stdout.read() return response
def stop_watchdog(): """kick the watchdog device. """ f = subprocess.Popen(WDTCLI_CMD + " stop", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) info, err = f.communicate() if len(err) != 0: Logger.error("failed to stop watchdog device") else: Logger.info("watchdog stopped")
def host_shutdown(): MAIN_POWER = "/sys/bus/i2c/drivers/syscpld/12-0031/pwr_main_n" USERVER_POWER = "/sys/bus/i2c/drivers/syscpld/12-0031/pwr_usrv_en" cmd = 'echo 0 > ' + USERVER_POWER Logger.info("host_shutdown() executing {}".format(cmd)) response = Popen(cmd, shell=True, stdout=PIPE).stdout.read() time.sleep(5) cmd = 'echo 0 > ' + MAIN_POWER Logger.info("host_shutdown() executing {}".format(cmd)) response = Popen(cmd, shell=True, stdout=PIPE).stdout.read() return response
def board_callout(callout='None', **kwargs): ''' Override this method for defining board specific callouts: - Exmaple chassis intrusion ''' if 'init_fans' in callout: boost = 100 # define a boost for the platform or respect fscd override if 'boost' in kwargs: boost = kwargs['boost'] return set_all_pwm(boost) else: Logger.warning("Callout %s not handled" % callout) pass
def set_pwm(self, fan, pct): """ Method to set fan pwm Arguments: fan: fan sensor object pct: new pct to set to the specific fan Returns: N/A """ Logger.debug("Set pwm %d to %d" % (int(fan.source.name), pct)) fan.source.write(pct)
def board_callout(callout="None", **kwargs): """ Override this method for defining board specific callouts: - Exmaple chassis intrusion """ if "init_fans" in callout: boost = 100 # define a boost for the platform or respect fscd override if "boost" in kwargs: boost = kwargs["boost"] return set_all_pwm(boost) else: Logger.warning("Callout %s not handled" % callout) pass
def __init__(self, sensor_name, pTable): try: if 'read_source' in pTable: if 'sysfs' in pTable['read_source']: self.source = FscSensorSourceSysfs( name=sensor_name, read_source=pTable['read_source']['sysfs']) if 'util' in pTable['read_source']: self.source = FscSensorSourceUtil( name=sensor_name, read_source=pTable['read_source']['util']) except Exception: Logger.error("Unknown Sensor source type")
def make_controller(pTable): if pTable['type'] == 'linear': controller = TTable(pTable['data'], pTable.get('negative_hysteresis', 0), pTable.get('positive_hysteresis', 0)) return controller if pTable['type'] == 'pid': controller = PID(pTable['setpoint'], pTable['kp'], pTable['ki'], pTable['kd'], pTable['negative_hysteresis'], pTable['positive_hysteresis']) return controller err = "Don't understand profile type '%s'" % (pTable['type']) Logger.error(err)
def __init__(self, sensor_name, pTable): try: if "read_source" in pTable: if "sysfs" in pTable["read_source"]: self.source = FscSensorSourceSysfs( name=sensor_name, read_source=pTable["read_source"]["sysfs"]) if "util" in pTable["read_source"]: self.source = FscSensorSourceUtil( name=sensor_name, read_source=pTable["read_source"]["util"]) except Exception: Logger.error("Unknown Sensor source type")
def set_all_pwm(self, fans, pct): """ Method to set all fans pwm Arguments: fans: fan sensor objects pct: new pct to set to the specific fan Returns: N/A """ Logger.debug("Set all pwm to %d" % (pct)) for key, _value in list(fans.items()): self.set_pwm(fans[key], pct)
def board_callout(callout='None', **kwargs): ''' Override this method for defining board specific callouts: - Exmaple chassis intrusion ''' if 'init_fans' in callout: boost = 100 if 'boost' in kwargs: boost = kwargs['boost'] Logger.info("FSC init fans to boost=%s " % str(boost)) return set_all_pwm(boost) else: Logger.warn("Need to perform callout action %s" % callout) pass