class OpTestHMIHandling(): ## Initialize this object # @param i_bmcIP The IP address of the BMC # @param i_bmcUser The userid to log into the BMC with # @param i_bmcPasswd The password of the userid to log into the BMC with # @param i_bmcUserIpmi The userid to issue the BMC IPMI commands with # @param i_bmcPasswdIpmi The password of BMC IPMI userid # @param i_ffdcDir Optional param to indicate where to write FFDC # # "Only required for inband tests" else Default = None # @param i_hostIP The IP address of the Host # @param i_hostuser The userid to log into the Host # @param i_hostPasswd The password of the userid to log into the Host with # def __init__(self, i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir=None, i_hostip=None, i_hostuser=None, i_hostPasswd=None): self.cv_BMC = OpTestBMC(i_bmcIP, i_bmcUser, i_bmcPasswd, i_ffdcDir) self.cv_IPMI = OpTestIPMI(i_bmcIP, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd) self.cv_HOST = OpTestHost(i_hostip, i_hostuser, i_hostPasswd, i_bmcIP, i_ffdcDir) self.cv_SYSTEM = OpTestSystem(i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd) self.util = OpTestUtil() ## # @brief This is a common function for all the hmi test cases. This will be executed before # any test case starts. Basically this provides below requirements. # 1. Validates all required host commands # 2. It will clone skiboot source repository # 3. Compile the necessary tools xscom-utils and gard utility to test HMI. # 4. Get the list Of Chips and cores in the form of dictionary. # Ex: [['00000000', ['4', '5', '6', 'c', 'd', 'e']], ['00000001', ['4', '5', '6', 'c', 'd', 'e']], ['00000010', ['4', '5', '6', 'c', 'd', 'e']]] # 5. In-order to inject HMI errors on cpu's, cpu should be running, # so disabling the sleep states 1 and 2 of all CPU's. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def test_init(self): self.cv_SYSTEM.sys_bmc_power_on_validate_host() # Get OS level self.l_oslevel = self.cv_HOST.host_get_OS_Level() # Check whether git and gcc commands are available on the host self.cv_HOST.host_check_command("git") self.cv_HOST.host_check_command("gcc") # It will clone skiboot source repository l_dir = "/tmp/skiboot" self.cv_HOST.host_clone_skiboot_source(l_dir) # Compile the necessary tools xscom-utils and gard utility self.cv_HOST.host_compile_xscom_utilities(l_dir) self.cv_HOST.host_compile_gard_utility(l_dir) # Getting list of processor chip Id's(executing getscom -l to get chip id's) l_res = self.cv_HOST.host_run_command("cd %s/external/xscom-utils/; ./getscom -l" % l_dir) l_res = l_res.splitlines() l_chips = [] for line in l_res: matchObj = re.search("(\d{8}).*processor", line) if matchObj: l_chips.append(matchObj.group(1)) if not l_chips: l_msg = "Getscom failed to list processor chip id's" raise OpTestError(l_msg) l_chips.sort() print l_chips # ['00000000', '00000001', '00000010'] # Currently getting the list of active core id's with respect to each chip is by using opal msg log # TODO: Need to identify best way to get list of cores(If Opal msg log is empty) l_cmd = "cat /sys/firmware/opal/msglog | grep -i CHIP" l_res = self.cv_HOST.host_run_command(l_cmd) l_cores = {} self.l_dic = [] l_res = l_res.splitlines() for line in l_res: matchObj = re.search("Chip (\d{1,2}) Core ([a-z0-9])", line) if matchObj: if l_cores.has_key(int(matchObj.group(1))): (l_cores[int(matchObj.group(1))]).append(matchObj.group(2)) else: l_cores[int(matchObj.group(1))] = list(matchObj.group(2)) if not l_cores: l_msg = "Failed in getting core id's information from OPAL msg log" raise OpTestError(l_msg) print l_cores # {0: ['4', '5', '6', 'c', 'd', 'e'], 1: ['4', '5', '6', 'c', 'd', 'e'], 10: ['4', '5', '6', 'c', 'd', 'e']} l_cores = sorted(l_cores.iteritems()) print l_cores i=0 for tup in l_cores: new_list = [l_chips[i], tup[1]] self.l_dic.append(new_list) i+=1 print self.l_dic # self.l_dic is a list of chip id's, core id's . and is of below format # [['00000000', ['4', '5', '6', 'c', 'd', 'e']], ['00000001', ['4', '5', '6', 'c', 'd', 'e']], ['00000010', ['4', '5', '6', 'c', 'd', 'e']]] self.l_dir = l_dir # In-order to inject HMI errors on cpu's, cpu should be running, so disabling the sleep states 1 and 2 of all CPU's self.cv_HOST.host_run_command(BMC_CONST.GET_CPU_SLEEP_STATE2) self.cv_HOST.host_run_command(BMC_CONST.GET_CPU_SLEEP_STATE1) self.cv_HOST.host_run_command(BMC_CONST.GET_CPU_SLEEP_STATE0) self.cv_HOST.host_run_command(BMC_CONST.DISABLE_CPU_SLEEP_STATE1) self.cv_HOST.host_run_command(BMC_CONST.DISABLE_CPU_SLEEP_STATE2) self.cv_HOST.host_run_command(BMC_CONST.GET_CPU_SLEEP_STATE2) self.cv_HOST.host_run_command(BMC_CONST.GET_CPU_SLEEP_STATE1) self.cv_HOST.host_run_command(BMC_CONST.GET_CPU_SLEEP_STATE0) if "Ubuntu" in self.l_oslevel: self.cv_HOST.host_run_command("service kdump-tools stop") self.cv_HOST.host_run_command("service kdump-tools status") else: self.cv_HOST.host_run_command("service kdump stop") self.cv_HOST.host_run_command("service kdump status") ## # @brief This function is mainly used to clear hardware gard entries. # It will perform below steps # 1. Reboot the system(Power off/on) # 2. Clear any Hardware gard entries # 3. Again reboot the system, to make use of garded Hardware. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def clearGardEntries(self): self.cv_SYSTEM.sys_bmc_power_on_validate_host() # Power off and on the system. self.cv_IPMI.ipmi_power_off() self.cv_IPMI.ipmi_power_on() if int(self.cv_SYSTEM.sys_ipl_wait_for_working_state()): l_msg = "System failed to boot host OS" raise OpTestError(l_msg) time.sleep(BMC_CONST.HOST_BRINGUP_TIME) # Clearing gard entries after host comes up self.cv_HOST.host_get_OS_Level() # It will clone skiboot source repository l_dir = "/tmp/skiboot" self.cv_HOST.host_clone_skiboot_source(l_dir) # Compile the necessary tools xscom-utils and gard utility self.cv_HOST.host_compile_xscom_utilities(l_dir) self.cv_HOST.host_compile_gard_utility(l_dir) l_con = self.cv_SYSTEM.sys_get_ipmi_console() self.cv_IPMI.ipmi_host_login(l_con) self.cv_IPMI.ipmi_host_set_unique_prompt(l_con) self.cv_IPMI.run_host_cmd_on_ipmi_console("uname -a") l_dir = "/tmp/skiboot" self.cv_IPMI.run_host_cmd_on_ipmi_console("cd %s/external/gard/;" % l_dir) l_cmd = "./gard list; echo $?" self.cv_IPMI.run_host_cmd_on_ipmi_console(l_cmd) l_cmd = "./gard clear all; echo $?" l_res = self.cv_IPMI.run_host_cmd_on_ipmi_console(l_cmd) if int(l_res[-1]): l_msg = "Clearing gard entries through gard tool is failed" raise OpTestError(l_msg) l_cmd = "./gard list; echo $?" self.cv_IPMI.run_host_cmd_on_ipmi_console(l_cmd) # Rebooting the system again to make use of garded hardware self.cv_IPMI.ipmi_power_off() self.cv_IPMI.ipmi_power_on() if int(self.cv_SYSTEM.sys_ipl_wait_for_working_state()): l_msg = "System failed to boot host OS" raise OpTestError(l_msg) time.sleep(BMC_CONST.HOST_BRINGUP_TIME) self.cv_HOST.host_get_OS_Level() self.cv_SYSTEM.sys_ipmi_close_console(l_con) ## # @brief This function executes HMI test case based on the i_test value, Before test starts # disabling kdump service to make sure system reboots, after injecting non-recoverable errors. # # @param i_test @type int: this is the type of test case want to execute # BMC_CONST.HMI_PROC_RECV_DONE: Processor recovery done # BMC_CONST.HMI_PROC_RECV_ERROR_MASKED: proc_recv_error_masked # BMC_CONST.HMI_MALFUNCTION_ALERT: malfunction_alert # BMC_CONST.HMI_HYPERVISOR_RESOURCE_ERROR: hypervisor resource error # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def testHMIHandling(self, i_test): l_test = i_test self.test_init() l_con = self.cv_SYSTEM.sys_get_ipmi_console() self.cv_IPMI.ipmi_host_login(l_con) self.cv_IPMI.ipmi_host_set_unique_prompt(l_con) self.cv_IPMI.run_host_cmd_on_ipmi_console("uname -a") self.cv_IPMI.run_host_cmd_on_ipmi_console("cat /etc/os-release") self.cv_IPMI.run_host_cmd_on_ipmi_console("cd %s/external/xscom-utils/;" % self.l_dir) self.cv_IPMI.run_host_cmd_on_ipmi_console("lscpu") self.cv_IPMI.run_host_cmd_on_ipmi_console("dmesg -D") if l_test == BMC_CONST.HMI_PROC_RECV_DONE: self.test_proc_recv_done() elif l_test == BMC_CONST.HMI_PROC_RECV_ERROR_MASKED: self.test_proc_recv_error_masked() elif l_test == BMC_CONST.HMI_MALFUNCTION_ALERT: self.test_malfunction_allert() elif l_test == BMC_CONST.HMI_HYPERVISOR_RESOURCE_ERROR: self.test_hyp_resource_err() elif l_test == BMC_CONST.TOD_ERRORS: # TOD Error recovery works on systems having more than one chip TOD # Skip this test on single chip systems(as recovery fails on 1S systems) if len(self.l_dic) == 1: l_msg = "This is a single chip system, TOD Error recovery won't work" print l_msg return BMC_CONST.FW_SUCCESS elif len(self.l_dic) > 1: self.test_tod_errors(BMC_CONST.PSS_HAMMING_DISTANCE) self.test_tod_errors(BMC_CONST.INTERNAL_PATH_OR_PARITY_ERROR) self.test_tod_errors(BMC_CONST.TOD_DATA_PARITY_ERROR) self.test_tod_errors(BMC_CONST.TOD_SYNC_CHECK_ERROR) self.test_tod_errors(BMC_CONST.FSM_STATE_PARITY_ERROR) self.test_tod_errors(BMC_CONST.MASTER_PATH_CONTROL_REGISTER) self.test_tod_errors(BMC_CONST.PORT_0_PRIMARY_CONFIGURATION_REGISTER) self.test_tod_errors(BMC_CONST.PORT_1_PRIMARY_CONFIGURATION_REGISTER) self.test_tod_errors(BMC_CONST.PORT_0_SECONDARY_CONFIGURATION_REGISTER) self.test_tod_errors(BMC_CONST.PORT_1_SECONDARY_CONFIGURATION_REGISTER) self.test_tod_errors(BMC_CONST.SLAVE_PATH_CONTROL_REGISTER) self.test_tod_errors(BMC_CONST.INTERNAL_PATH_CONTROL_REGISTER) self.test_tod_errors(BMC_CONST.PR_SC_MS_SL_CONTROL_REGISTER) else: l_msg = "Getting Chip information failed" raise OpTestError(l_msg) elif l_test == BMC_CONST.TFMR_ERRORS: self.testTFMR_Errors(BMC_CONST.TB_PARITY_ERROR) self.testTFMR_Errors(BMC_CONST.TFMR_PARITY_ERROR) self.testTFMR_Errors(BMC_CONST.TFMR_HDEC_PARITY_ERROR) self.testTFMR_Errors(BMC_CONST.TFMR_DEC_PARITY_ERROR) self.testTFMR_Errors(BMC_CONST.TFMR_PURR_PARITY_ERROR) self.testTFMR_Errors(BMC_CONST.TFMR_SPURR_PARITY_ERROR) else: l_msg = "Please provide valid test case" raise OpTestError(l_msg) self.cv_SYSTEM.sys_ipmi_close_console(l_con) print "Gathering the OPAL msg logs" self.cv_HOST.host_gather_opal_msg_log() return BMC_CONST.FW_SUCCESS ## # @brief This function is used to test HMI: processor recovery done # and also this function injecting error on all the cpus one by one and # verify whether cpu is recovered or not. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def test_proc_recv_done(self): for l_pair in self.l_dic: l_chip = l_pair[0] for l_core in l_pair[1]: l_reg = "1%s013100" % l_core l_cmd = "./putscom -c %s %s 0000000000100000; echo $?" % (l_chip, l_reg) self.cv_IPMI.run_host_cmd_on_ipmi_console("dmesg -C") time.sleep(10) l_res = self.cv_IPMI.run_host_cmd_on_ipmi_console(l_cmd) time.sleep(10) if l_res[-1] == "0": print "Injected thread hang recoverable error" elif l_res[-1] == "1": # putscom returns -5 when it is trying to read from write only access register, # In these cases we should not exit and we will contiue with other error injetions continue else: if any("Kernel panic - not syncing" in line for line in l_res): l_msg = "Processor recovery failed: Kernel got panic" elif any("Petitboot" in line for line in l_res): l_msg = "System reached petitboot:Processor recovery failed" elif any("ISTEP" in line for line in l_res): l_msg = "System started booting: Processor recovery failed" else: l_msg = "Failed to inject thread hang recoverable error" print l_msg raise OpTestError(l_msg) l_res = self.cv_IPMI.run_host_cmd_on_ipmi_console("dmesg") if any("Processor Recovery done" in line for line in l_res) and \ any("Harmless Hypervisor Maintenance interrupt [Recovered]" in line for line in l_res): print "Processor recovery done" else: l_msg = "HMI handling failed to log message: for proc_recv_done" raise OpTestError(l_msg) time.sleep(BMC_CONST.HMI_TEST_CASE_SLEEP_TIME) return BMC_CONST.FW_SUCCESS ## # @brief This function is used to test HMI: proc_recv_error_masked # Processor went through recovery for an error which is actually masked for reporting # this function also injecting the error on all the cpu's one-by-one. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def test_proc_recv_error_masked(self): for l_pair in self.l_dic: l_chip = l_pair[0] for l_core in l_pair[1]: l_reg = "1%s013100" % l_core l_cmd = "./putscom -c %s %s 0000000000080000; echo $?" % (l_chip, l_reg) self.cv_IPMI.run_host_cmd_on_ipmi_console("dmesg -C") time.sleep(10) l_res = self.cv_IPMI.run_host_cmd_on_ipmi_console(l_cmd) time.sleep(10) if l_res[-1] == "0": print "Injected thread hang recoverable error" elif l_res[-1] == "1": continue else: if any("Kernel panic - not syncing" in line for line in l_res): l_msg = "Processor recovery failed: Kernel got panic" elif any("Petitboot" in line for line in l_res): l_msg = "System reached petitboot:Processor recovery failed" elif any("ISTEP" in line for line in l_res): l_msg = "System started booting: Processor recovery failed" else: l_msg = "Failed to inject thread hang recoverable error" print l_msg raise OpTestError(l_msg) l_res = self.cv_IPMI.run_host_cmd_on_ipmi_console("dmesg") if any("Processor Recovery done" in line for line in l_res) and \ any("Harmless Hypervisor Maintenance interrupt [Recovered]" in line for line in l_res): print "Processor recovery done" else: l_msg = "HMI handling failed to log message" raise OpTestError(l_msg) time.sleep(BMC_CONST.HMI_TEST_CASE_SLEEP_TIME) return BMC_CONST.FW_SUCCESS ## # @brief This function is used to test hmi malfunction alert:Core checkstop # A processor core in the system has to be checkstopped (failed recovery). # Injecting core checkstop on random core of random chip # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def test_malfunction_allert(self): # Get random pair of chip vs cores l_pair = random.choice(self.l_dic) # Get random chip id l_chip = l_pair[0] # Get random core number l_core = random.choice(l_pair[1]) l_reg = "1%s013100" % l_core l_cmd = "./putscom -c %s %s 1000000000000000" % (l_chip, l_reg) l_res = self.cv_IPMI.run_host_cmd_on_ipmi_console(l_cmd) if any("Kernel panic - not syncing" in line for line in l_res): print "Malfunction alert: kernel got panic" elif any("login:"******"System booted to host OS without any kernel panic message" elif any("Petitboot" in line for line in l_res): print "System reached petitboot without any kernel panic message" elif any("ISTEP" in line for line in l_res): print "System started booting without any kernel panic message" else: l_msg = "HMI: Malfunction alert failed" raise OpTestError(l_msg) return BMC_CONST.FW_SUCCESS ## # @brief This function is used to test HMI: Hypervisor resource error # Injecting Hypervisor resource error on random core of random chip # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def test_hyp_resource_err(self): # Get random pair of chip vs cores l_pair = random.choice(self.l_dic) # Get random chip id l_chip = l_pair[0] # Get random core number l_core = random.choice(l_pair[1]) l_reg = "1%s013100" % l_core l_cmd = "./putscom -c %s %s 0000000000008000" % (l_chip, l_reg) l_res = self.cv_IPMI.run_host_cmd_on_ipmi_console(l_cmd) if any("Kernel panic - not syncing" in line for line in l_res) and \ any("Hypervisor Resource error - core check stop" in line for line in l_res): print "Hypervisor resource error: kernel got panic" elif any("login:"******"System booted to host OS without any kernel panic message" elif any("Petitboot" in line for line in l_res): print "System reached petitboot without any kernel panic message" elif any("ISTEP" in line for line in l_res): print "System started booting without any kernel panic message" else: l_msg = "HMI: Hypervisor resource error failed" raise OpTestError(l_msg) return BMC_CONST.FW_SUCCESS ## # @brief This function tests timer facility related error injections and check # the corresponding error got recovered. And this process is repeated # for all the active cores in all the chips. # # @param i_error @type string: this is the type of error want to inject # BMC_CONST.TB_PARITY_ERROR # BMC_CONST.TFMR_PARITY_ERROR # BMC_CONST.TFMR_HDEC_PARITY_ERROR # BMC_CONST.TFMR_DEC_PARITY_ERROR # BMC_CONST.TFMR_PURR_PARITY_ERROR # BMC_CONST.TFMR_SPURR_PARITY_ERROR # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def testTFMR_Errors(self, i_error): l_error = i_error for l_pair in self.l_dic: l_chip = l_pair[0] for l_core in l_pair[1]: l_reg = "1%s013281" % l_core l_cmd = "./putscom -c %s %s %s;echo $?" % (l_chip, l_reg, l_error) self.cv_IPMI.run_host_cmd_on_ipmi_console("dmesg -C") l_res = self.cv_IPMI.run_host_cmd_on_ipmi_console(l_cmd) time.sleep(10) if l_res[-1] == "0": print "Injected TFMR error %s" % l_error elif l_res[-1] == "1": continue else: if any("Kernel panic - not syncing" in line for line in l_res): l_msg = "TFMR error injection: Kernel got panic" elif any("Petitboot" in line for line in l_res): l_msg = "System reached petitboot:TFMR error injection recovery failed" elif any("ISTEP" in line for line in l_res): l_msg = "System started booting: TFMR error injection recovery failed" else: l_msg = "Failed to inject TFMR error %s " % l_error print l_msg raise OpTestError(l_msg) l_res = self.cv_IPMI.run_host_cmd_on_ipmi_console("dmesg") if any("Timer facility experienced an error" in line for line in l_res) and \ any("Severe Hypervisor Maintenance interrupt [Recovered]" in line for line in l_res): print "Timer facility experienced an error and got recovered" else: l_msg = "HMI handling failed to log message" raise OpTestError(l_msg) time.sleep(BMC_CONST.HMI_TEST_CASE_SLEEP_TIME) return BMC_CONST.FW_SUCCESS ## # @brief This function tests chip TOD related error injections and check # the corresponding error got recovered. And this error injection # happening on a random chip. This tod errors should test on systems # having more than one processor socket(chip). On single chip system # TOD error recovery won't work. # # @param i_error @type string: this is the type of error want to inject # These errors represented in common/OpTestConstants.py file. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def test_tod_errors(self, i_error): l_error = i_error l_pair = random.choice(self.l_dic) # Get random chip id l_chip = l_pair[0] l_cmd = "./putscom -c %s %s %s;echo $?" % (l_chip, BMC_CONST.TOD_ERROR_REG, l_error) self.cv_IPMI.run_host_cmd_on_ipmi_console("dmesg -C") l_res = self.cv_IPMI.run_host_cmd_on_ipmi_console(l_cmd) time.sleep(10) # As of now putscom command to TOD register will fail with return code -1. # putscom indirectly call getscom to read the value again. # But getscom to TOD error reg there is no access # TOD Error reg has only WO access and there is no read access if l_res[-1] == "1": print "Injected TOD error %s" % l_error else: if any("Kernel panic - not syncing" in line for line in l_res): print "TOD ERROR Injection-kernel got panic" elif any("login:"******"System booted to host OS without any kernel panic message" elif any("Petitboot" in line for line in l_res): print "System reached petitboot without any kernel panic message" elif any("ISTEP" in line for line in l_res): print "System started booting without any kernel panic message" else: l_msg = "TOD: PSS Hamming distance error injection failed" raise OpTestError(l_msg) l_res = self.cv_IPMI.run_host_cmd_on_ipmi_console("dmesg") if any("Timer facility experienced an error" in line for line in l_res) and \ any("Severe Hypervisor Maintenance interrupt [Recovered]" in line for line in l_res): print "Timer facility experienced an error and got recovered" else: l_msg = "HMI handling failed to log message" raise OpTestError(l_msg) time.sleep(BMC_CONST.HMI_TEST_CASE_SLEEP_TIME) return BMC_CONST.FW_SUCCESS ## # @brief This function enables a single core # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def host_enable_single_core(self): self.cv_HOST.host_enable_single_core()
class OpTestMCColdResetEffects(): ## Initialize this object # @param i_bmcIP The IP address of the BMC # @param i_bmcUser The userid to log into the BMC with # @param i_bmcPasswd The password of the userid to log into the BMC with # @param i_bmcUserIpmi The userid to issue the BMC IPMI commands with # @param i_bmcPasswdIpmi The password of BMC IPMI userid # @param i_ffdcDir Optional param to indicate where to write FFDC # # "Only required for inband tests" else Default = None # @param i_hostIP The IP address of the HOST # @param i_hostuser The userid to log into the HOST # @param i_hostPasswd The password of the userid to log into the HOST with # def __init__(self, i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir=None, i_hostip=None, i_hostuser=None, i_hostPasswd=None): self.cv_BMC = OpTestBMC(i_bmcIP, i_bmcUser, i_bmcPasswd, i_ffdcDir) self.cv_IPMI = OpTestIPMI(i_bmcIP, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd) self.cv_HOST = OpTestHost(i_hostip, i_hostuser, i_hostPasswd, i_bmcIP, i_ffdcDir) self.cv_SYSTEM = OpTestSystem(i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd) self.util = OpTestUtil() self.opTestSensors = OpTestSensors(i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd) ## # @brief This function will test BMC Cold reset vs Host FW status # 1. When system is in runtime issue BMC Cold reset. # 2. Check Host FW services and drivers. # 3. Run sensors command # 4. Get list of chips # 5. This is expected to fail. # https://github.com/open-power/op-build/issues/482 # 6. Reboot the system at the end of test. # # @return BMC_CONST.FW_SUCCESS or BMC_CONST.FW_FAILED # def test_bmc_cold_reset_effects(self): print "Test BMC Cold reset effects versus Host Firmware Status" self.cv_SYSTEM.sys_bmc_power_on_validate_host() print "Issue BMC Cold reset" result = True try: self.cv_SYSTEM.sys_cold_reset_bmc() l_dir = BMC_CONST.SKIBOOT_WORKING_DIR self.cv_HOST.host_clone_skiboot_source(l_dir) self.cv_HOST.host_compile_xscom_utilities(l_dir) l_con = self.cv_SYSTEM.sys_get_ipmi_console() self.cv_IPMI.ipmi_host_login(l_con) self.cv_IPMI.ipmi_host_set_unique_prompt(l_con) self.cv_IPMI.run_host_cmd_on_ipmi_console("uname -a") self.cv_IPMI.run_host_cmd_on_ipmi_console( "cd %s/external/xscom-utils/; ./getscom -l" % l_dir) self.opTestSensors.test_hwmon_driver() self.cv_SYSTEM.sys_ipmi_close_console(l_con) except: result = False pass print "Gathering the OPAL msg logs" self.cv_HOST.host_gather_opal_msg_log() self.cv_IPMI.ipmi_power_off() self.cv_SYSTEM.sys_bmc_power_on_validate_host() if result is False: raise OpTestError("MC Cold reset vs Host FW Test failed")
class OpTestHMIHandling: ## Initialize this object # @param i_bmcIP The IP address of the BMC # @param i_bmcUser The userid to log into the BMC with # @param i_bmcPasswd The password of the userid to log into the BMC with # @param i_bmcUserIpmi The userid to issue the BMC IPMI commands with # @param i_bmcPasswdIpmi The password of BMC IPMI userid # @param i_ffdcDir Optional param to indicate where to write FFDC # # "Only required for inband tests" else Default = None # @param i_lparIP The IP address of the LPAR # @param i_lparuser The userid to log into the LPAR # @param i_lparPasswd The password of the userid to log into the LPAR with # def __init__( self, i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir=None, i_lparip=None, i_lparuser=None, i_lparPasswd=None, ): self.cv_BMC = OpTestBMC(i_bmcIP, i_bmcUser, i_bmcPasswd, i_ffdcDir) self.cv_IPMI = OpTestIPMI( i_bmcIP, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_lparip, i_lparuser, i_lparPasswd ) self.cv_LPAR = OpTestLpar(i_lparip, i_lparuser, i_lparPasswd, i_bmcIP) self.cv_SYSTEM = OpTestSystem( i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_lparip, i_lparuser, i_lparPasswd, ) self.util = OpTestUtil() ## # @brief This is a common function for all the hmi test cases. This will be executed before # any test case starts. Basically this provides below requirements. # 1. Validates all required lpar commands # 2. It will clone skiboot source repository # 3. Compile the necessary tools xscom-utils and gard utility to test HMI. # 4. Get the list Of Chips and cores in the form of dictionary. # Ex: [['00000000', ['4', '5', '6', 'c', 'd', 'e']], ['00000001', ['4', '5', '6', 'c', 'd', 'e']], ['00000010', ['4', '5', '6', 'c', 'd', 'e']]] # 5. In-order to inject HMI errors on cpu's, cpu should be running, # so disabling the sleep states 1 and 2 of all CPU's. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def test_init(self): # Get OS level self.cv_LPAR.lpar_get_OS_Level() # Check whether git and gcc commands are available on lpar self.cv_LPAR.lpar_check_command("git") self.cv_LPAR.lpar_check_command("gcc") # It will clone skiboot source repository l_dir = "/tmp/skiboot" self.cv_LPAR.lpar_clone_skiboot_source(l_dir) # Compile the necessary tools xscom-utils and gard utility self.cv_LPAR.lpar_compile_xscom_utilities(l_dir) self.cv_LPAR.lpar_compile_gard_utility(l_dir) # Getting list of processor chip Id's(executing getscom -l to get chip id's) l_res = self.cv_LPAR.lpar_run_command("cd %s/external/xscom-utils/; ./getscom -l" % l_dir) l_res = l_res.splitlines() l_chips = [] for line in l_res: matchObj = re.search("(\d{8}).*processor", line) if matchObj: l_chips.append(matchObj.group(1)) if not l_chips: l_msg = "Getscom failed to list processor chip id's" raise OpTestError(l_msg) l_chips.sort() print l_chips # ['00000000', '00000001', '00000010'] # Currently getting the list of active core id's with respect to each chip is by using opal msg log # TODO: Need to identify best way to get list of cores(If Opal msg log is empty) l_cmd = "cat /sys/firmware/opal/msglog | grep -i CHIP" l_res = self.cv_LPAR.lpar_run_command(l_cmd) l_cores = {} self.l_dic = [] l_res = l_res.splitlines() for line in l_res: matchObj = re.search("Chip (\d{1,2}) Core ([a-z0-9])", line) if matchObj: if l_cores.has_key(int(matchObj.group(1))): (l_cores[int(matchObj.group(1))]).append(matchObj.group(2)) else: l_cores[int(matchObj.group(1))] = list(matchObj.group(2)) if not l_cores: l_msg = "Failed in getting core id's information from OPAL msg log" raise OpTestError(l_msg) print l_cores # {0: ['4', '5', '6', 'c', 'd', 'e'], 1: ['4', '5', '6', 'c', 'd', 'e'], 10: ['4', '5', '6', 'c', 'd', 'e']} l_cores = sorted(l_cores.iteritems()) print l_cores i = 0 for tup in l_cores: new_list = [l_chips[i], tup[1]] self.l_dic.append(new_list) i += 1 print self.l_dic # self.l_dic is a list of chip id's, core id's . and is of below format # [['00000000', ['4', '5', '6', 'c', 'd', 'e']], ['00000001', ['4', '5', '6', 'c', 'd', 'e']], ['00000010', ['4', '5', '6', 'c', 'd', 'e']]] self.l_dir = l_dir # In-order to inject HMI errors on cpu's, cpu should be running, so disabling the sleep states 1 and 2 of all CPU's self.cv_LPAR.lpar_run_command(BMC_CONST.GET_CPU_SLEEP_STATE2) self.cv_LPAR.lpar_run_command(BMC_CONST.GET_CPU_SLEEP_STATE1) self.cv_LPAR.lpar_run_command(BMC_CONST.GET_CPU_SLEEP_STATE0) self.cv_LPAR.lpar_run_command(BMC_CONST.DISABLE_CPU_SLEEP_STATE1) self.cv_LPAR.lpar_run_command(BMC_CONST.DISABLE_CPU_SLEEP_STATE2) self.cv_LPAR.lpar_run_command(BMC_CONST.GET_CPU_SLEEP_STATE2) self.cv_LPAR.lpar_run_command(BMC_CONST.GET_CPU_SLEEP_STATE1) self.cv_LPAR.lpar_run_command(BMC_CONST.GET_CPU_SLEEP_STATE0) ## # @brief This function is mainly used to clear hardware gard entries. # It will perform below steps # 1. Reboot the system(Power off/on) # 2. Clear any Hardware gard entries # 3. Again reboot the system, to make use of garded Hardware. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def clearGardEntries(self): # Power off and on the system. self.cv_IPMI.ipmi_power_off() self.cv_IPMI.ipmi_power_on() if int(self.cv_SYSTEM.sys_ipl_wait_for_working_state()): l_msg = "System failed to boot host OS" raise OpTestError(l_msg) time.sleep(BMC_CONST.LPAR_BRINGUP_TIME) # Clearing gard entries after lpar comes up self.cv_LPAR.lpar_get_OS_Level() l_con = self.cv_SYSTEM.sys_get_ipmi_console() self.cv_IPMI.ipmi_lpar_login(l_con) self.cv_IPMI.ipmi_lpar_set_unique_prompt(l_con) self.cv_IPMI.run_lpar_cmd_on_ipmi_console("uname -a") l_dir = "/tmp/skiboot" self.cv_IPMI.run_lpar_cmd_on_ipmi_console("cd %s/external/gard/;" % l_dir) l_cmd = "./gard list; echo $?" self.cv_IPMI.run_lpar_cmd_on_ipmi_console(l_cmd) l_cmd = "./gard clear all; echo $?" l_res = self.cv_IPMI.run_lpar_cmd_on_ipmi_console(l_cmd) if int(l_res[-1]): l_msg = "Clearing gard entries through gard tool is failed" raise OpTestError(l_msg) l_cmd = "./gard list; echo $?" self.cv_IPMI.run_lpar_cmd_on_ipmi_console(l_cmd) # Rebooting the system again to make use of garded hardware self.cv_IPMI.ipmi_power_off() self.cv_IPMI.ipmi_power_on() if int(self.cv_SYSTEM.sys_ipl_wait_for_working_state()): l_msg = "System failed to boot host OS" raise OpTestError(l_msg) time.sleep(BMC_CONST.LPAR_BRINGUP_TIME) self.cv_LPAR.lpar_get_OS_Level() self.cv_SYSTEM.sys_ipmi_close_console(l_con) ## # @brief This function executes HMI test case based on the i_test value, Before test starts # disabling kdump service to make sure system reboots, after injecting non-recoverable errors. # # @param i_test @type int: this is the type of test case want to execute # BMC_CONST.HMI_PROC_RECV_DONE: Processor recovery done # BMC_CONST.HMI_PROC_RECV_ERROR_MASKED: proc_recv_error_masked # BMC_CONST.HMI_MALFUNCTION_ALERT: malfunction_alert # BMC_CONST.HMI_HYPERVISOR_RESOURCE_ERROR: hypervisor resource error # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def testHMIHandling(self, i_test): l_test = i_test self.test_init() l_con = self.cv_SYSTEM.sys_get_ipmi_console() self.cv_IPMI.ipmi_lpar_login(l_con) self.cv_IPMI.ipmi_lpar_set_unique_prompt(l_con) self.cv_IPMI.run_lpar_cmd_on_ipmi_console("uname -a") self.cv_IPMI.run_lpar_cmd_on_ipmi_console("cat /etc/os-release") self.cv_IPMI.run_lpar_cmd_on_ipmi_console("service kdump status") self.cv_IPMI.run_lpar_cmd_on_ipmi_console("service kdump stop") self.cv_IPMI.run_lpar_cmd_on_ipmi_console("service kdump status") self.cv_IPMI.run_lpar_cmd_on_ipmi_console("cd %s/external/xscom-utils/;" % self.l_dir) self.cv_IPMI.run_lpar_cmd_on_ipmi_console("lscpu") self.cv_IPMI.run_lpar_cmd_on_ipmi_console("dmesg -D") if l_test == BMC_CONST.HMI_PROC_RECV_DONE: self.test_proc_recv_done() elif l_test == BMC_CONST.HMI_PROC_RECV_ERROR_MASKED: self.test_proc_recv_error_masked() elif l_test == BMC_CONST.HMI_MALFUNCTION_ALERT: self.test_malfunction_allert() elif l_test == BMC_CONST.HMI_HYPERVISOR_RESOURCE_ERROR: self.test_hyp_resource_err() else: l_msg = "Please provide valid test case" raise OpTestError(l_msg) self.cv_SYSTEM.sys_ipmi_close_console(l_con) return BMC_CONST.FW_SUCCESS ## # @brief This function is used to test HMI: processor recovery done # and also this function injecting error on all the cpus one by one and # verify whether cpu is recovered or not. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def test_proc_recv_done(self): for l_pair in self.l_dic: l_chip = l_pair[0] for l_core in l_pair[1]: l_reg = "1%s013100" % l_core l_cmd = "./putscom -c %s %s 0000000000100000; echo $?" % (l_chip, l_reg) self.cv_IPMI.run_lpar_cmd_on_ipmi_console("dmesg -C") time.sleep(10) l_res = self.cv_IPMI.run_lpar_cmd_on_ipmi_console(l_cmd) time.sleep(10) if l_res[-1] == "0": print "Injected thread hang recoverable error" else: if any("Kernel panic - not syncing" in line for line in l_res): l_msg = "Processor recovery failed: Kernel got panic" elif any("Petitboot" in line for line in l_res): l_msg = "System reached petitboot:Processor recovery failed" elif any("ISTEP" in line for line in l_res): l_msg = "System started booting: Processor recovery failed" else: l_msg = "Failed to inject thread hang recoverable error" print l_msg raise OpTestError(l_msg) l_res = self.cv_IPMI.run_lpar_cmd_on_ipmi_console("dmesg") if any("Processor Recovery done" in line for line in l_res) and any( "Harmless Hypervisor Maintenance interrupt [Recovered]" in line for line in l_res ): print "Processor recovery done" else: l_msg = "HMI handling failed to log message: for proc_recv_done" raise OpTestError(l_msg) time.sleep(BMC_CONST.HMI_TEST_CASE_SLEEP_TIME) return BMC_CONST.FW_SUCCESS ## # @brief This function is used to test HMI: proc_recv_error_masked # Processor went through recovery for an error which is actually masked for reporting # this function also injecting the error on all the cpu's one-by-one. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def test_proc_recv_error_masked(self): for l_pair in self.l_dic: l_chip = l_pair[0] for l_core in l_pair[1]: l_reg = "1%s013100" % l_core l_cmd = "./putscom -c %s %s 0000000000080000; echo $?" % (l_chip, l_reg) self.cv_IPMI.run_lpar_cmd_on_ipmi_console("dmesg -C") time.sleep(10) l_res = self.cv_IPMI.run_lpar_cmd_on_ipmi_console(l_cmd) time.sleep(10) if l_res[-1] == "0": print "Injected thread hang recoverable error" else: if any("Kernel panic - not syncing" in line for line in l_res): l_msg = "Processor recovery failed: Kernel got panic" elif any("Petitboot" in line for line in l_res): l_msg = "System reached petitboot:Processor recovery failed" elif any("ISTEP" in line for line in l_res): l_msg = "System started booting: Processor recovery failed" else: l_msg = "Failed to inject thread hang recoverable error" print l_msg raise OpTestError(l_msg) l_res = self.cv_IPMI.run_lpar_cmd_on_ipmi_console("dmesg") if any("Processor Recovery done" in line for line in l_res) and any( "Harmless Hypervisor Maintenance interrupt [Recovered]" in line for line in l_res ): print "Processor recovery done" else: l_msg = "HMI handling failed to log message" raise OpTestError(l_msg) time.sleep(BMC_CONST.HMI_TEST_CASE_SLEEP_TIME) return BMC_CONST.FW_SUCCESS ## # @brief This function is used to test hmi malfunction alert:Core checkstop # A processor core in the system has to be checkstopped (failed recovery). # Injecting core checkstop on random core of random chip # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def test_malfunction_allert(self): # Get random pair of chip vs cores l_pair = random.choice(self.l_dic) # Get random chip id l_chip = l_pair[0] # Get random core number l_core = random.choice(l_pair[1]) l_reg = "1%s013100" % l_core l_cmd = "./putscom -c %s %s 1000000000000000" % (l_chip, l_reg) l_res = self.cv_IPMI.run_lpar_cmd_on_ipmi_console(l_cmd) if any("Kernel panic - not syncing" in line for line in l_res): print "Malfunction alert: kernel got panic" elif any("login:"******"System booted to host OS without any kernel panic message" elif any("Petitboot" in line for line in l_res): print "System reached petitboot without any kernel panic message" elif any("ISTEP" in line for line in l_res): print "System started booting without any kernel panic message" else: l_msg = "HMI: Malfunction alert failed" raise OpTestError(l_msg) return BMC_CONST.FW_SUCCESS ## # @brief This function is used to test HMI: Hypervisor resource error # Injecting Hypervisor resource error on random core of random chip # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def test_hyp_resource_err(self): # Get random pair of chip vs cores l_pair = random.choice(self.l_dic) # Get random chip id l_chip = l_pair[0] # Get random core number l_core = random.choice(l_pair[1]) l_reg = "1%s013100" % l_core l_cmd = "./putscom -c %s %s 0000000000008000" % (l_chip, l_reg) l_res = self.cv_IPMI.run_lpar_cmd_on_ipmi_console(l_cmd) if any("Kernel panic - not syncing" in line for line in l_res) and any( "Hypervisor Resource error - core check stop" in line for line in l_res ): print "Hypervisor resource error: kernel got panic" elif any("login:"******"System booted to host OS without any kernel panic message" elif any("Petitboot" in line for line in l_res): print "System reached petitboot without any kernel panic message" elif any("ISTEP" in line for line in l_res): print "System started booting without any kernel panic message" else: l_msg = "HMI: Hypervisor resource error failed" raise OpTestError(l_msg) return BMC_CONST.FW_SUCCESS
class OpTestPCI: ## Initialize this object # @param i_bmcIP The IP address of the BMC # @param i_bmcUser The userid to log into the BMC with # @param i_bmcPasswd The password of the userid to log into the BMC with # @param i_bmcUserIpmi The userid to issue the BMC IPMI commands with # @param i_bmcPasswdIpmi The password of BMC IPMI userid # @param i_ffdcDir Optional param to indicate where to write FFDC # # "Only required for inband tests" else Default = None # @param i_hostIP The IP address of the HOST # @param i_hostuser The userid to log into the HOST # @param i_hostPasswd The password of the userid to log into the HOST with # def __init__( self, i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir=None, i_hostip=None, i_hostuser=None, i_hostPasswd=None, i_hostLspci=None, ): self.cv_BMC = OpTestBMC(i_bmcIP, i_bmcUser, i_bmcPasswd, i_ffdcDir) self.cv_IPMI = OpTestIPMI( i_bmcIP, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd ) self.cv_HOST = OpTestHost(i_hostip, i_hostuser, i_hostPasswd, i_bmcIP, i_ffdcDir) self.cv_SYSTEM = OpTestSystem( i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd, ) self.util = OpTestUtil() self.lspci_file = i_hostLspci ## # @brief This function will get the PCI and USB susbsytem Info # And also this test compares known good data of # "lspci -mm -n" which is stored in testcases/data directory # with the current lspci data. # User need to specify the corresponding file name into # machines xml like lspci.txt which contains "lspci -mm -n" # command output in a working good state of system. # tools used are lspci and lsusb # # @return BMC_CONST.FW_SUCCESS or BMC_CONST.FW_FAILED # def test_host_pci_devices_info(self): if self.lspci_file == "empty.txt": print "Skipping the pci devices comparision as missing the lspci data file name in machines xml" return BMC_CONST.FW_SUCCESS filename = os.path.join(os.path.dirname(__file__).split("testcases")[0], self.lspci_file) if not os.path.isfile(filename): raise OpTestError("lspci file %s not found in top level directory" % filename) self.pci_good_data_file = filename self.test_skiroot_pci_devices() self.cv_IPMI.ipmi_set_boot_to_disk() self.cv_IPMI.ipmi_power_off() self.cv_IPMI.ipmi_power_on() self.cv_IPMI.ipl_wait_for_working_state() self.test_host_pci_devices() ## # @brief This function will get the "lspci -mm -n" output from the petitboot # and compares it with known good lspci data # # @return BMC_CONST.FW_SUCCESS or BMC_CONST.FW_FAILED # def test_skiroot_pci_devices(self): cmd = "lspci -mm -n" self.console = self.cv_SYSTEM.sys_get_ipmi_console() self.cv_SYSTEM.sys_ipmi_boot_system_to_petitboot(self.console) self.cv_IPMI.ipmi_host_set_unique_prompt(self.console) self.cv_IPMI.run_host_cmd_on_ipmi_console("uname -a") self.cv_IPMI.run_host_cmd_on_ipmi_console("cat /etc/os-release") res = self.cv_IPMI.run_host_cmd_on_ipmi_console(cmd) self.cv_SYSTEM.sys_ipmi_close_console(self.console) self.pci_data_petitboot = "\n".join(res[1:]) diff_process = subprocess.Popen(["diff", "-u", self.pci_good_data_file, "-"], stdin=subprocess.PIPE) diff_stdout, diff_stderr = diff_process.communicate(self.pci_data_petitboot + "\n") r = diff_process.wait() if r == 0: print "All the pci devices are detected at petitboot" else: print diff_stdout raise OpTestError("There is a mismatch b/w known good output and tested petitboot lspci output") ## # @brief This function will get the "lspci -mm -n" output from the host OS # and compares it with known good lspci data # # @return BMC_CONST.FW_SUCCESS or BMC_CONST.FW_FAILED # def test_host_pci_devices(self): self.cv_HOST.host_check_command("lspci", "lsusb") self.cv_HOST.host_list_pci_devices() self.cv_HOST.host_get_pci_verbose_info() self.cv_HOST.host_list_usb_devices() l_res = self.cv_HOST.host_run_command("lspci -mm -n") self.pci_data_hostos = l_res.replace("\r\n", "\n") diff_process = subprocess.Popen(["diff", "-u", self.pci_good_data_file, "-"], stdin=subprocess.PIPE) diff_stdout, diff_stderr = diff_process.communicate(self.pci_data_hostos) r = diff_process.wait() if r == 0: print "All the pci devices are detected in host OS" else: print diff_stdout raise OpTestError("There is a mismatch b/w known good output and tested host OS lspci output")
class OpTestMCColdResetEffects(): ## Initialize this object # @param i_bmcIP The IP address of the BMC # @param i_bmcUser The userid to log into the BMC with # @param i_bmcPasswd The password of the userid to log into the BMC with # @param i_bmcUserIpmi The userid to issue the BMC IPMI commands with # @param i_bmcPasswdIpmi The password of BMC IPMI userid # @param i_ffdcDir Optional param to indicate where to write FFDC # # "Only required for inband tests" else Default = None # @param i_hostIP The IP address of the HOST # @param i_hostuser The userid to log into the HOST # @param i_hostPasswd The password of the userid to log into the HOST with # def __init__(self, i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir=None, i_hostip=None, i_hostuser=None, i_hostPasswd=None): self.cv_BMC = OpTestBMC(i_bmcIP, i_bmcUser, i_bmcPasswd, i_ffdcDir) self.cv_IPMI = OpTestIPMI(i_bmcIP, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd) self.cv_HOST = OpTestHost(i_hostip, i_hostuser, i_hostPasswd, i_bmcIP, i_ffdcDir) self.cv_SYSTEM = OpTestSystem(i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd) self.util = OpTestUtil() self.opTestSensors = OpTestSensors(i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd) ## # @brief This function will test BMC Cold reset vs Host FW status # 1. When system is in runtime issue BMC Cold reset. # 2. Check Host FW services and drivers. # 3. Run sensors command # 4. Get list of chips # 5. This is expected to fail. # https://github.com/open-power/op-build/issues/482 # 6. Reboot the system at the end of test. # # @return BMC_CONST.FW_SUCCESS or BMC_CONST.FW_FAILED # def test_bmc_cold_reset_effects(self): print "Test BMC Cold reset effects versus Host Firmware Status" self.cv_SYSTEM.sys_bmc_power_on_validate_host() print "Issue BMC Cold reset" result = True try: self.cv_SYSTEM.sys_cold_reset_bmc() l_dir = BMC_CONST.SKIBOOT_WORKING_DIR self.cv_HOST.host_clone_skiboot_source(l_dir) self.cv_HOST.host_compile_xscom_utilities(l_dir) l_con = self.cv_SYSTEM.sys_get_ipmi_console() self.cv_IPMI.ipmi_host_login(l_con) self.cv_IPMI.ipmi_host_set_unique_prompt(l_con) self.cv_IPMI.run_host_cmd_on_ipmi_console("uname -a") self.cv_IPMI.run_host_cmd_on_ipmi_console("cd %s/external/xscom-utils/; ./getscom -l" % l_dir) self.opTestSensors.test_hwmon_driver() self.cv_SYSTEM.sys_ipmi_close_console(l_con) except: result = False pass print "Gathering the OPAL msg logs" self.cv_HOST.host_gather_opal_msg_log() self.cv_IPMI.ipmi_power_off() self.cv_SYSTEM.sys_bmc_power_on_validate_host() if result is False: raise OpTestError("MC Cold reset vs Host FW Test failed")
class OpTestPCI(): ## Initialize this object # @param i_bmcIP The IP address of the BMC # @param i_bmcUser The userid to log into the BMC with # @param i_bmcPasswd The password of the userid to log into the BMC with # @param i_bmcUserIpmi The userid to issue the BMC IPMI commands with # @param i_bmcPasswdIpmi The password of BMC IPMI userid # @param i_ffdcDir Optional param to indicate where to write FFDC # # "Only required for inband tests" else Default = None # @param i_hostIP The IP address of the HOST # @param i_hostuser The userid to log into the HOST # @param i_hostPasswd The password of the userid to log into the HOST with # def __init__(self, i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir=None, i_hostip=None, i_hostuser=None, i_hostPasswd=None, i_hostLspci=None): self.cv_BMC = OpTestBMC(i_bmcIP, i_bmcUser, i_bmcPasswd, i_ffdcDir) self.cv_IPMI = OpTestIPMI(i_bmcIP, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd) self.cv_HOST = OpTestHost(i_hostip, i_hostuser, i_hostPasswd, i_bmcIP, i_ffdcDir) self.cv_SYSTEM = OpTestSystem(i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd) self.util = OpTestUtil() self.lspci_file = i_hostLspci ## # @brief This function will get the PCI and USB susbsytem Info # And also this test compares known good data of # "lspci -mm -n" which is stored in testcases/data directory # with the current lspci data. # User need to specify the corresponding file name into # machines xml like lspci.txt which contains "lspci -mm -n" # command output in a working good state of system. # tools used are lspci and lsusb # # @return BMC_CONST.FW_SUCCESS or BMC_CONST.FW_FAILED # def test_host_pci_devices_info(self): if self.lspci_file == "empty.txt": print "Skipping the pci devices comparision as missing the lspci data file name in machines xml" return BMC_CONST.FW_SUCCESS filename = os.path.join( os.path.dirname(__file__).split('testcases')[0], self.lspci_file) if not os.path.isfile(filename): raise OpTestError( "lspci file %s not found in top level directory" % filename) self.pci_good_data_file = filename self.test_skiroot_pci_devices() self.cv_IPMI.ipmi_set_boot_to_disk() self.cv_IPMI.ipmi_power_off() self.cv_IPMI.ipmi_power_on() self.cv_IPMI.ipl_wait_for_working_state() self.test_host_pci_devices() ## # @brief This function will get the "lspci -mm -n" output from the petitboot # and compares it with known good lspci data # # @return BMC_CONST.FW_SUCCESS or BMC_CONST.FW_FAILED # def test_skiroot_pci_devices(self): cmd = "lspci -mm -n" self.console = self.cv_SYSTEM.sys_get_ipmi_console() self.cv_SYSTEM.sys_ipmi_boot_system_to_petitboot(self.console) self.cv_IPMI.ipmi_host_set_unique_prompt(self.console) self.cv_IPMI.run_host_cmd_on_ipmi_console("uname -a") self.cv_IPMI.run_host_cmd_on_ipmi_console("cat /etc/os-release") res = self.cv_IPMI.run_host_cmd_on_ipmi_console(cmd) self.cv_SYSTEM.sys_ipmi_close_console(self.console) self.pci_data_petitboot = '\n'.join(res[1:]) diff_process = subprocess.Popen( ['diff', "-u", self.pci_good_data_file, "-"], stdin=subprocess.PIPE) diff_stdout, diff_stderr = diff_process.communicate( self.pci_data_petitboot + '\n') r = diff_process.wait() if r == 0: print "All the pci devices are detected at petitboot" else: print diff_stdout raise OpTestError( "There is a mismatch b/w known good output and tested petitboot lspci output" ) ## # @brief This function will get the "lspci -mm -n" output from the host OS # and compares it with known good lspci data # # @return BMC_CONST.FW_SUCCESS or BMC_CONST.FW_FAILED # def test_host_pci_devices(self): self.cv_HOST.host_check_command("lspci", "lsusb") self.cv_HOST.host_list_pci_devices() self.cv_HOST.host_get_pci_verbose_info() self.cv_HOST.host_list_usb_devices() l_res = self.cv_HOST.host_run_command("lspci -mm -n") self.pci_data_hostos = l_res.replace("\r\n", "\n") diff_process = subprocess.Popen( ['diff', "-u", self.pci_good_data_file, "-"], stdin=subprocess.PIPE) diff_stdout, diff_stderr = diff_process.communicate( self.pci_data_hostos) r = diff_process.wait() if r == 0: print "All the pci devices are detected in host OS" else: print diff_stdout raise OpTestError( "There is a mismatch b/w known good output and tested host OS lspci output" )