def setup_ntp(ptfhost, duthost): """setup ntp client and server""" # enable ntp server ptfhost.service(name="ntp", state="started") # setup ntp on dut to sync with ntp server config_facts = duthost.config_facts(host=duthost.hostname, source="running")['ansible_facts'] ntp_servers = config_facts.get('NTP_SERVER', {}) for ntp_server in ntp_servers: duthost.command("config ntp del %s" % ntp_server) ptfip = ptfhost.host.options['inventory_manager'].get_host(ptfhost.hostname).vars['ansible_host'] duthost.command("config ntp add %s" % ptfip) wait_until(120, 5, check_ntp_status, ptfhost) yield # stop ntp server ptfhost.service(name="ntp", state="stopped") # reset ntp client configuration duthost.command("config ntp del %s" % ptfip) for ntp_server in ntp_servers: duthost.command("config ntp add %s" % ntp_server)
def setup_reboot_standby(self, duthost2, localhost, delay=10, timeout=180): dut2_ports = natsorted( g_vars['dut2_port_alias'] ['port_name_map'].keys())[:len(g_vars['dut2_all_interfaces'])] for port in dut2_ports: duthost2.shell("config interface shutdown {}".format(port)) duthost2.shell("config save -y") duthost2.shell("nohup reboot &", module_ignore_errors=True) time.sleep(20) yield # waiting for ssh to startup dut_ip = duthost2.host.options['inventory_manager'].get_host( duthost2.hostname).address localhost.wait_for(host=dut_ip, port=SONIC_SSH_PORT, state='started', search_regex=SONIC_SSH_REGEX, delay=delay, timeout=timeout) wait_until(120, 10, duthost2.critical_services_fully_started) for port in dut2_ports: duthost2.shell("config interface startup {}".format(port)) duthost2.shell("config save -y") time.sleep(5)
def turn_off_psu_and_check_thermal_control(dut, psu_ctrl, psu, mocker): """ @summary: Turn off PSUs, check all FAN speed are set to 100% according to thermal control policy file. """ logging.info("Turn off PSU %s" % str(psu["psu_id"])) psu_ctrl.turn_off_psu(psu["psu_id"]) time.sleep(5) psu_under_test = None psu_line_pattern = re.compile(r"PSU\s+\d+\s+(OK|NOT OK|NOT PRESENT)") cli_psu_status = dut.command(CMD_PLATFORM_PSUSTATUS) for line in cli_psu_status["stdout_lines"][2:]: assert psu_line_pattern.match(line), "Unexpected PSU status output" fields = line.split() if fields[2] != "OK": psu_under_test = fields[1] assert psu_under_test is not None, "No PSU is turned off" logging.info('Wait and check all FAN speed turn to 100%...') wait_until(THERMAL_CONTROL_TEST_WAIT_TIME, THERMAL_CONTROL_TEST_CHECK_INTERVAL, mocker.check_all_fan_speed, 100) psu_ctrl.turn_on_psu(psu["psu_id"])
def test_thermal_control_fan_status(testbed_devices, mocker_factory): """ @summary: Make FAN absence, over speed and under speed, check logs and LED color. """ dut = testbed_devices["dut"] loganalyzer = LogAnalyzer(ansible_host=dut, marker_prefix='thermal_control') loganalyzer.load_common_config() with ThermalPolicyFileContext(dut, THERMAL_POLICY_VALID_FILE): fan_mocker = mocker_factory(dut, 'FanStatusMocker') if fan_mocker is None: pytest.skip("No FanStatusMocker for %s, skip rest of the testing in this case" % dut.facts['asic_type']) logging.info('Mock FAN status data...') fan_mocker.mock_data() # make data random restart_thermal_control_daemon(dut) wait_until(THERMAL_CONTROL_TEST_WAIT_TIME, THERMAL_CONTROL_TEST_CHECK_INTERVAL, fan_mocker.check_all_fan_speed, 60) check_thermal_algorithm_status(dut, mocker_factory, False) single_fan_mocker = mocker_factory(dut, 'SingleFanMocker') time.sleep(THERMAL_CONTROL_TEST_WAIT_TIME) if single_fan_mocker.is_fan_removable(): loganalyzer.expect_regex = [LOG_EXPECT_FAN_REMOVE_RE] with loganalyzer: logging.info('Mocking an absence FAN...') single_fan_mocker.mock_absence() check_cli_output_with_mocker(dut, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME) loganalyzer.expect_regex = [LOG_EXPECT_FAN_REMOVE_CLEAR_RE] with loganalyzer: logging.info('Make the absence FAN back to presence...') single_fan_mocker.mock_presence() check_cli_output_with_mocker(dut, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME) loganalyzer.expect_regex = [LOG_EXPECT_FAN_OVER_SPEED_RE] with loganalyzer: logging.info('Mocking an over speed FAN...') single_fan_mocker.mock_over_speed() check_cli_output_with_mocker(dut, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME) loganalyzer.expect_regex = [LOG_EXPECT_FAN_OVER_SPEED_CLEAR_RE] with loganalyzer: logging.info('Make the over speed FAN back to normal...') single_fan_mocker.mock_normal_speed() check_cli_output_with_mocker(dut, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME) loganalyzer.expect_regex = [LOG_EXPECT_FAN_UNDER_SPEED_RE] with loganalyzer: logging.info('Mocking an under speed FAN...') single_fan_mocker.mock_under_speed() check_cli_output_with_mocker(dut, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME) loganalyzer.expect_regex = [LOG_EXPECT_FAN_UNDER_SPEED_CLEAR_RE] with loganalyzer: logging.info('Make the under speed FAN back to normal...') single_fan_mocker.mock_normal_speed() check_cli_output_with_mocker(dut, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME)
def test_set_psu_fan_speed(duthost, mocker_factory): hwsku = duthost.facts["hwsku"] psu_num = SWITCH_MODELS[hwsku]['psus']['number'] hot_swappable = SWITCH_MODELS[hwsku]['psus']['hot_swappable'] if not hot_swappable: pytest.skip( 'The SKU {} does not support this test case.'.format(hwsku)) logging.info('Create mocker, it may take a few seconds...') single_fan_mocker = mocker_factory(duthost, 'SingleFanMocker') logging.info('Mock FAN absence...') single_fan_mocker.mock_absence() assert wait_until(THERMAL_CONTROL_TEST_WAIT_TIME, THERMAL_CONTROL_TEST_CHECK_INTERVAL, check_cooling_cur_state, duthost, 10, operator.eq), \ 'Current cooling state is {}'.format(get_cooling_cur_state(duthost)) logging.info('Wait {} seconds for the policy to take effect...'.format( THERMAL_CONTROL_TEST_CHECK_INTERVAL)) time.sleep(THERMAL_CONTROL_TEST_CHECK_INTERVAL) full_speeds = [] for index in range(psu_num): speed = get_psu_speed(duthost, index) full_speeds.append(speed) logging.info('Full speed={}'.format(full_speeds)) logging.info('Mock FAN presence...') single_fan_mocker.mock_presence() assert wait_until(THERMAL_CONTROL_TEST_WAIT_TIME, THERMAL_CONTROL_TEST_CHECK_INTERVAL, check_cooling_cur_state, duthost, 10, operator.ne), \ 'Current cooling state is {}'.format(get_cooling_cur_state(duthost)) logging.info('Wait {} seconds for the policy to take effect...'.format( THERMAL_CONTROL_TEST_CHECK_INTERVAL)) time.sleep(THERMAL_CONTROL_TEST_CHECK_INTERVAL) cooling_cur_state = get_cooling_cur_state(duthost) logging.info('Cooling level changed to {}'.format(cooling_cur_state)) current_speeds = [] for index in range(psu_num): speed = get_psu_speed(duthost, index) current_speeds.append(speed) logging.info('Current speed={}'.format(current_speeds)) index = 0 if cooling_cur_state < 6: cooling_cur_state = 6 expect_multiple = float(10) / cooling_cur_state while index < psu_num: full_speed = full_speeds[index] current_speed = current_speeds[index] index += 1 if not full_speed or not current_speed: continue actual_multiple = float(full_speed) / current_speed if expect_multiple > actual_multiple: assert actual_multiple > expect_multiple * (1 - PSU_SPEED_TOLERANCE) elif expect_multiple < actual_multiple: assert actual_multiple < expect_multiple * (1 + PSU_SPEED_TOLERANCE)
def __verify_lag_minlink(self, host, lag_name, intf, neighbor_intf, po_interfaces, po_flap, deselect_time, wait_timeout=30): delay = 5 try: host.shutdown(neighbor_intf) # Let PortalChannel react to neighbor interface shutdown time.sleep(deselect_time) # Verify PortChannel interfaces are up correctly for po_intf in po_interfaces.keys(): if po_intf != intf: command = 'bash -c "teamdctl %s state dump" | python -c "import sys, json; print json.load(sys.stdin)[\'ports\'][\'%s\'][\'runner\'][\'selected\']"' % ( lag_name, po_intf) wait_until(wait_timeout, delay, self.__check_shell_output, self.duthost, command) # Refresh lag facts lag_facts = self.__get_lag_facts() # Verify lag member is marked deselected for the shutdown port and all other lag member interfaces are marked selected for po_intf in po_interfaces.keys(): if po_intf != intf: assert lag_facts['lags'][lag_name]['po_stats']['ports'][ po_intf]['runner']['selected'] else: assert not lag_facts['lags'][lag_name]['po_stats'][ 'ports'][po_intf]['runner']['selected'] # Verify PortChannel's interface are marked down/up correctly if it should down/up if po_flap == True: assert lag_facts['lags'][lag_name]['po_intf_stat'] == 'Down' else: assert lag_facts['lags'][lag_name]['po_intf_stat'] == 'Up' finally: # Bring back port in case test error and left testbed in unknow stage # Bring up neighbor interface host.no_shutdown(neighbor_intf) # Verify PortChannel interfaces are up correctly for po_intf in po_interfaces.keys(): if po_intf != intf: command = 'bash -c "teamdctl %s state dump" | python -c "import sys, json; print json.load(sys.stdin)[\'ports\'][\'%s\'][\'link\'][\'up\']"' % ( lag_name, po_intf) wait_until(wait_timeout, delay, self.__check_shell_output, self.duthost, command)
def test_thermal_control_psu_absence(testbed_devices, psu_controller, mocker_factory): """ @summary: Turn off/on PSUs, check thermal control is working as expect. """ dut = testbed_devices["dut"] psu_num = get_psu_num(dut) if psu_num < 2: pytest.skip("At least 2 PSUs required for rest of the testing in this case") logging.info("Create PSU controller for testing") psu_ctrl = psu_controller if psu_ctrl is None: pytest.skip("No PSU controller for %s, skip rest of the testing in this case" % dut.hostname) logging.info("To avoid DUT being shutdown, need to turn on PSUs that are not powered") turn_all_psu_on(psu_ctrl) logging.info("Initialize test results") psu_test_results = {} if not check_all_psu_on(dut, psu_test_results): pytest.skip("Some PSU are still down, skip rest of the testing in this case") with ThermalPolicyFileContext(dut, THERMAL_POLICY_VALID_FILE): fan_mocker = mocker_factory(dut, 'FanStatusMocker') if fan_mocker is None: pytest.skip("No FanStatusMocker for %s, skip rest of the testing in this case" % dut.facts['asic_type']) logging.info('Mock FAN status data...') fan_mocker.mock_data() # make data random restart_thermal_control_daemon(dut) logging.info('Wait and check all FAN speed turn to 60%...') wait_until(THERMAL_CONTROL_TEST_WAIT_TIME, THERMAL_CONTROL_TEST_CHECK_INTERVAL, fan_mocker.check_all_fan_speed, 60) check_thermal_algorithm_status(dut, mocker_factory, False) logging.info('Shutdown first PSU and check thermal control result...') all_psu_status = psu_ctrl.get_psu_status() psu = all_psu_status[0] turn_off_psu_and_check_thermal_control(dut, psu_ctrl, psu, fan_mocker) psu_test_results.clear() if not check_all_psu_on(dut, psu_test_results): pytest.skip("Some PSU are still down, skip rest of the testing in this case") logging.info('Shutdown second PSU and check thermal control result...') psu = all_psu_status[1] turn_off_psu_and_check_thermal_control(dut, psu_ctrl, psu, fan_mocker) psu_test_results.clear() if not check_all_psu_on(dut, psu_test_results): pytest.skip("Some PSU are still down, skip rest of the testing in this case") logging.info('Wait and check all FAN speed turn to 65%...') wait_until(THERMAL_CONTROL_TEST_WAIT_TIME, THERMAL_CONTROL_TEST_CHECK_INTERVAL, fan_mocker.check_all_fan_speed, 65)
def run_lag_fallback_test(self, lag_name): logging.info("Start checking lag fall back for: %s" % lag_name) lag_facts = self.__get_lag_facts() intf, po_interfaces = self.__get_lag_intf_info(lag_facts, lag_name) po_fallback = lag_facts['lags'][lag_name]['po_config']['runner'][ 'fallback'] # Figure out remote VM and interface info for the lag member and run lag fallback test peer_device = self.vm_neighbors[intf]['name'] neighbor_intf = self.vm_neighbors[intf]['port'] vm_host = self.nbrhosts[peer_device]['host'] wait_timeout = 120 delay = 5 try: # Shut down neighbor interface vm_host.shutdown(neighbor_intf) wait_until(wait_timeout, delay, self.__check_intf_state, vm_host, neighbor_intf, False) # Refresh lag facts lag_facts = self.__get_lag_facts() # Get teamshow result teamshow_result = self.duthost.shell('teamshow') logging.debug("Teamshow result: %s" % teamshow_result) # Verify lag members # 1. All other lag should keep selected state # 2. Shutdown port should keep selected state if fallback enabled # 3. Shutdown port should marded as deselected if fallback disabled # is marked deselected for the shutdown port and all other lag member interfaces are marked selected for po_intf in po_interfaces.keys(): if po_intf != intf or po_fallback: assert lag_facts['lags'][lag_name]['po_stats']['ports'][ po_intf]['runner']['selected'] else: assert not lag_facts['lags'][lag_name]['po_stats'][ 'ports'][po_intf]['runner']['selected'] # The portchannel should marked Up/Down correctly according to po fallback setting if po_fallback: assert lag_facts['lags'][lag_name]['po_intf_stat'] == 'Up' else: assert lag_facts['lags'][lag_name]['po_intf_stat'] == 'Down' finally: # Bring up neighbor interface vm_host.no_shutdown(neighbor_intf) wait_until(wait_timeout, delay, self.__check_intf_state, vm_host, neighbor_intf, True)
def restart_service_and_check(localhost, dut, service, interfaces): """ Restart specified service and check platform status """ logging.info("Restart the %s service" % service) dut.command("sudo systemctl restart %s" % service) logging.info("Wait until all critical services are fully started") check_critical_services(dut) logging.info("Wait some time for all the transceivers to be detected") assert wait_until(300, 20, check_interface_information, dut, interfaces), \ "Not all interface information are detected within 300 seconds" logging.info("Check transceiver status") check_transceiver_basic(dut, interfaces) if dut.facts["asic_type"] in ["mellanox"]: current_file_dir = os.path.dirname(os.path.realpath(__file__)) sub_folder_dir = os.path.join(current_file_dir, "mellanox") if sub_folder_dir not in sys.path: sys.path.append(sub_folder_dir) from check_hw_mgmt_service import check_hw_management_service from check_sysfs import check_sysfs logging.info("Check the hw-management service") check_hw_management_service(dut) logging.info("Check sysfs") check_sysfs(dut)
def test_fastboot(self, duthost, localhost, testbed): duthost.command('sudo config save -y') reboot(duthost, localhost, reboot_type='fast') assert wait_until(300, 20, duthost.critical_services_fully_started ), "Not all critical services are fully started" self.basic_check_after_reboot(duthost, localhost, testbed)
def test_standby_up(self, duthost, duthost2, ptfhost, testbed): dut1_status = duthost.shell( "mclagdctl -i {} dump state|grep 'keepalive'".format( g_vars['mclag_domain_id']))['stdout'].split(":")[-1].strip() dut2_status = duthost2.shell( "mclagdctl -i {} dump state|grep 'keepalive'".format( g_vars['mclag_domain_id']))['stdout'].split(":")[-1].strip() assert dut1_status == dut2_status == "OK", "Mclag keepalive status should be OK on both peers after active reboot up" # before send pkts, wait until standby mclag re-aggregate successfully due to router_mac change assert wait_until(150, 10, check_teamd_status, duthost2, g_vars['dut1_router_mac']), \ "Standby teamd status should be up and sysid should be same as active's mac" ptf_runner( ptfhost, "ptftests", "mclag_test.MclagTest", platform_dir="ptftests", params={ "router_mac": g_vars['dut1_router_mac'], "router_mac_dut2": g_vars['dut2_router_mac'], "testbed_type": testbed['topo'], "switch_info": "/tmp/mclag/mclag_switch_info_{}.txt".format(test_scenario), "test_scenario": test_scenario, "ignore_ports": [] }, log_file="/tmp/mclag/log/mclag_{}_[{}]_[{}].log".format( test_scenario, self.__class__.__name__, sys._getframe().f_code.co_name))
def test_ntp(self, duthost): force_ntp=" ntpd -gq" duthost.service(name='ntp' , state='stopped') logging.info("Ntp restart in mgmt vrf") execute_dut_command(duthost, force_ntp) duthost.service(name='ntp' , state='restarted') assert wait_until(100, 10, self.check_ntp_status , duthost), "Ntp not started"
def test_ntp(duthost, setup_ntp): """ verify the LLDP message on DUT """ duthost.service(name='ntp', state='stopped') duthost.command("ntpd -gq") duthost.service(name='ntp', state='restarted') assert wait_until(120, 5, check_ntp_status, duthost), "Ntp not in sync"
def test_reload_configuration(testbed_devices, conn_graph_facts): """ @summary: This test case is to reload the configuration and check platform status """ ans_host = testbed_devices["dut"] interfaces = conn_graph_facts["device_conn"] asic_type = ans_host.facts["asic_type"] logging.info("Reload configuration") ans_host.command("sudo config reload -y") logging.info("Wait until all critical services are fully started") check_critical_services(ans_host) logging.info("Wait some time for all the transceivers to be detected") assert wait_until(300, 20, check_interface_information, ans_host, interfaces), \ "Not all transceivers are detected in 300 seconds" logging.info("Check transceiver status") check_transceiver_basic(ans_host, interfaces) if asic_type in ["mellanox"]: current_file_dir = os.path.dirname(os.path.realpath(__file__)) sub_folder_dir = os.path.join(current_file_dir, "mellanox") if sub_folder_dir not in sys.path: sys.path.append(sub_folder_dir) from check_hw_mgmt_service import check_hw_management_service from check_sysfs import check_sysfs logging.info("Check the hw-management service") check_hw_management_service(ans_host) logging.info("Check sysfs") check_sysfs(ans_host)
def setup_ntp(ptfhost, duthost, creds): """setup ntp client and server""" if creds.get('proxy_env'): # If testbed is behaind proxy then force ntpd inside ptf use local time ptfhost.lineinfile(path="/etc/ntp.conf", line="server 127.127.1.0 prefer") # enable ntp server ntp_en_res = ptfhost.service(name="ntp", state="started") # setup ntp on dut to sync with ntp server config_facts = duthost.config_facts(host=duthost.hostname, source="running")['ansible_facts'] ntp_servers = config_facts.get('NTP_SERVER', {}) for ntp_server in ntp_servers: duthost.command("config ntp del %s" % ntp_server) ptfip = ptfhost.host.options['inventory_manager'].get_host( ptfhost.hostname).vars['ansible_host'] duthost.command("config ntp add %s" % ptfip) pytest_assert(wait_until(120, 5, check_ntp_status, ptfhost), \ "NTP server was not started in PTF container {}; NTP service start result {}".format(ptfhost.hostname, ntp_en_res)) yield # stop ntp server ptfhost.service(name="ntp", state="stopped") # reset ntp client configuration duthost.command("config ntp del %s" % ptfip) for ntp_server in ntp_servers: duthost.command("config ntp add %s" % ntp_server)
def check_interfaces_and_services(dut, interfaces, reboot_type=None): """ Perform a further check after reboot-cause, including transceiver status, interface status @param localhost: The Localhost object. @param dut: The AnsibleHost object of DUT. @param interfaces: DUT's interfaces defined by minigraph """ logging.info("Wait until all critical services are fully started") check_critical_services(dut) if reboot_type is not None: logging.info("Check reboot cause") reboot_cause = reboot_ctrl_dict[reboot_type]["cause"] assert wait_until(MAX_WAIT_TIME_FOR_REBOOT_CAUSE, 20, check_reboot_cause, dut, reboot_cause), \ "got reboot-cause failed after rebooted by %s" % reboot_cause if reboot_ctrl_dict[reboot_type]["test_reboot_cause_only"]: logging.info( "Further checking skipped for %s test which intends to verify reboot-cause only" % reboot_type) return logging.info("Wait %d seconds for all the transceivers to be detected" % MAX_WAIT_TIME_FOR_INTERFACES) assert wait_until(MAX_WAIT_TIME_FOR_INTERFACES, 20, check_interface_information, dut, interfaces), \ "Not all transceivers are detected or interfaces are up in %d seconds" % MAX_WAIT_TIME_FOR_INTERFACES logging.info("Check transceiver status") check_transceiver_basic(dut, interfaces) logging.info("Check pmon daemon status") assert check_pmon_daemon_status(dut), "Not all pmon daemons running." if dut.facts["asic_type"] in ["mellanox"]: current_file_dir = os.path.dirname(os.path.realpath(__file__)) sub_folder_dir = os.path.join(current_file_dir, "mellanox") if sub_folder_dir not in sys.path: sys.path.append(sub_folder_dir) from check_hw_mgmt_service import check_hw_management_service from check_sysfs import check_sysfs logging.info("Check the hw-management service") check_hw_management_service(dut) logging.info("Check sysfs") check_sysfs(dut)
def test_reboot(self, localhost, testbed_devices, testbed): duthost = testbed_devices["dut"] duthost.command('sudo config save -y') reboot(duthost, localhost) assert wait_until(300, 20, duthost.critical_services_fully_started ), "Not all critical services are fully started" self.basic_check_after_reboot(duthost, localhost, testbed_devices, testbed)
def reboot_and_check(localhost, dut, interfaces, reboot_type="cold"): """ Perform the specified type of reboot and check platform status. """ logging.info("Run %s reboot on DUT" % reboot_type) if reboot_type == "cold": reboot_cmd = "reboot" reboot_timeout = 300 elif reboot_type == "fast": reboot_cmd = "fast-reboot" reboot_timeout = 180 elif reboot_type == "warm": reboot_cmd = "warm-reboot" reboot_timeout = 180 else: assert False, "Reboot type %s is not supported" % reboot_type process, queue = dut.command(reboot_cmd, module_async=True) logging.info("Wait for DUT to go down") res = localhost.wait_for(host=dut.hostname, port=22, state="stopped", delay=10, timeout=120, module_ignore_errors=True) if "failed" in res: if process.is_alive(): logging.error("Command '%s' is not completed" % reboot_cmd) process.terminate() logging.error("reboot result %s" % str(queue.get())) assert False, "DUT did not go down" logging.info("Wait for DUT to come back") localhost.wait_for(host=dut.hostname, port=22, state="started", delay=10, timeout=reboot_timeout) logging.info("Wait until all critical services are fully started") check_critical_services(dut) logging.info("Wait some time for all the transceivers to be detected") assert wait_until(300, 20, all_transceivers_detected, dut, interfaces), \ "Not all transceivers are detected in 300 seconds" logging.info("Check interface status") check_interface_status(dut, interfaces) logging.info("Check transceiver status") check_transceiver_basic(dut, interfaces) if dut.facts["asic_type"] in ["mellanox"]: current_file_dir = os.path.dirname(os.path.realpath(__file__)) sub_folder_dir = os.path.join(current_file_dir, "mellanox") if sub_folder_dir not in sys.path: sys.path.append(sub_folder_dir) from check_hw_mgmt_service import check_hw_management_service from check_sysfs import check_sysfs logging.info("Check the hw-management service") check_hw_management_service(dut) logging.info("Check sysfs") check_sysfs(dut)
def check_critical_services(dut): """ @summary: Use systemctl to check whether all the critical services have expected status. ActiveState of all services must be "active". SubState of all services must be "running". @param dut: The AnsibleHost object of DUT. For interacting with DUT. """ logging.info("Wait until all critical services are fully started") assert wait_until(300, 20, _all_critical_services_fully_started, dut), "Not all critical services are fully started"
def verify_drop_counters(duthost, dut_iface, get_cnt_cli_cmd, column_key): """ Verify drop counter incremented on specific interface """ get_drops = lambda: int(get_pkt_drops(duthost, get_cnt_cli_cmd)[dut_iface][column_key].replace(",", "")) check_drops_on_dut = lambda: PKT_NUMBER == get_drops() if not wait_until(5, 1, check_drops_on_dut): fail_msg = "'{}' drop counter was not incremented on iface {}. DUT {} == {}; Sent == {}".format( column_key, dut_iface, column_key, get_drops(), PKT_NUMBER ) pytest.fail(fail_msg)
def reboot(duthost, localhost, delay=10, timeout=180, wait=120, basic_check=True): """ cold reboots DUT :param duthost: DUT host object :param localhost: local host object :param delay: delay between ssh availability checks :param timeout: timeout for waiting ssh port state change :param wait: time to wait for DUT to initialize :param basic_check: check duthost.critical_services_fully_started after DUT initialize :return: """ dut_ip = duthost.host.options['inventory_manager'].get_host( duthost.hostname).address duthost.shell("nohup reboot &") logging.info('waiting for ssh to drop') res = localhost.wait_for(host=dut_ip, port=SONIC_SSH_PORT, state='absent', search_regex=SONIC_SSH_REGEX, delay=delay, timeout=timeout) if res.is_failed: raise Exception('DUT did not shutdown') # TODO: add serial output during reboot for better debuggability # This feature requires serial information to be present in # testbed information logging.info('waiting for ssh to startup') res = localhost.wait_for(host=dut_ip, port=SONIC_SSH_PORT, state='started', search_regex=SONIC_SSH_REGEX, delay=delay, timeout=timeout) if res.is_failed: raise Exception('DUT did not startup') logging.info('ssh has started up') logging.info('waiting for switch to initialize') time.sleep(wait) if basic_check: assert wait_until(timeout, 10, duthost.critical_services_fully_started), \ "All critical services should fully started!{}".format(duthost.CRITICAL_SERVICES)
def setup_bgp_graceful_restart(duthost, nbrhosts): config_facts = duthost.config_facts(host=duthost.hostname, source="running")['ansible_facts'] bgp_neighbors = config_facts.get('BGP_NEIGHBOR', {}) for k, nbr in nbrhosts.items(): logger.info("enable graceful restart on neighbor {}".format(k)) logger.info("bgp asn {}".format(nbr['conf']['bgp']['asn'])) res = nbr['host'].eos_config(lines=["graceful-restart restart-time 300"], \ parents=["router bgp {}".format(nbr['conf']['bgp']['asn'])]) logger.info("abc {}".format(res)) res = nbr['host'].eos_config(lines=["graceful-restart"], \ parents=["router bgp {}".format(nbr['conf']['bgp']['asn']), "address-family ipv4"]) logger.info("abc {}".format(res)) res = nbr['host'].eos_config(lines=["graceful-restart"], \ parents=["router bgp {}".format(nbr['conf']['bgp']['asn']), "address-family ipv6"]) logger.info("abc {}".format(res)) # change graceful restart option will clear the bgp session. # so, let's wait for all bgp sessions to be up logger.info("bgp neighbors: {}".format(bgp_neighbors.keys())) if not wait_until(300, 10, duthost.check_bgp_session_state, bgp_neighbors.keys()): pytest.fail( "not all bgp sessions are up after enable graceful restart") yield for k, nbr in nbrhosts.items(): # start bgpd if not started nbr['host'].start_bgpd() logger.info("disable graceful restart on neighbor {}".format(k)) nbr['host'].eos_config(lines=["no graceful-restart"], \ parents=["router bgp {}".format(nbr['conf']['bgp']['asn']), "address-family ipv4"]) nbr['host'].eos_config(lines=["no graceful-restart"], \ parents=["router bgp {}".format(nbr['conf']['bgp']['asn']), "address-family ipv6"]) if not wait_until(300, 10, duthost.check_bgp_session_state, bgp_neighbors.keys()): pytest.fail( "not all bgp sessions are up after disable graceful restart")
def test_techsupport(request, config, duthost, testbed): """ test the "show techsupport" command in a loop :param config: fixture to configure additional setups_list on dut. :param duthost: DUT host :param testbed: testbed """ loop_range = request.config.getoption("--loop_num") or DEFAULT_LOOP_RANGE loop_delay = request.config.getoption("--loop_delay") or DEFAULT_LOOP_DELAY since = request.config.getoption("--logs_since") or str(randint(1, 23)) + " minute ago" logger.debug("Loop_range is {} and loop_delay is {}".format(loop_range, loop_delay)) for i in range(loop_range): logger.debug("Running show techsupport ... ") wait_until(300, 20, execute_command, duthost, str(since)) tar_file = [j for j in pytest.tar_stdout.split('\n') if j != ''][-1] stdout = duthost.command("rm -rf {}".format(tar_file)) logger.debug("Sleeping for {} seconds".format(loop_delay)) time.sleep(loop_delay)
def testFastreboot(self, sflowbase_config, duthost, localhost, partial_ptf_runner, ptfhost): config_sflow(duthost,sflow_status='enable') verify_show_sflow(duthost,status='up',collector=['collector0','collector1']) duthost.command('sudo config save -y') reboot(duthost, localhost,reboot_type='fast') assert wait_until(300, 20, duthost.critical_services_fully_started), "Not all critical services are fully started" verify_show_sflow(duthost,status='up',collector=['collector0','collector1']) for intf in var['sflow_ports']: var['sflow_ports'][intf]['ifindex'] = get_ifindex(duthost,intf) verify_sflow_interfaces(duthost,intf,'up',512) var['portmap'] = json.dumps(var['sflow_ports']) ptfhost.copy(content=var['portmap'],dest="/tmp/sflow_ports.json") partial_ptf_runner( enabled_sflow_interfaces=var['sflow_ports'].keys(), active_collectors="['collector0','collector1']" )
def check_critical_services(dut): """ @summary: Use systemctl to check whether all the critical services have expected status. ActiveState of all services must be "active". SubState of all services must be "running". @param dut: The AnsibleHost object of DUT. For interacting with DUT. """ logging.info("Wait until all critical services are fully started") assert wait_until(300, 20, dut.critical_services_fully_started ), "Not all critical services are fully started" logging.info("Check critical service status") for service in dut.CRITICAL_SERVICES: status = dut.get_service_props(service) assert status["ActiveState"] == "active", \ "ActiveState of %s is %s, expected: active" % (service, status["ActiveState"]) assert status["SubState"] == "running", \ "SubState of %s is %s, expected: active" % (service, status["SubState"])
def testRebootSflowDisable(self, sflowbase_config, duthost, testbed_devices, localhost, partial_ptf_runner, ptfhost): config_sflow(duthost, sflow_status='disable') verify_show_sflow(duthost, status='down') partial_ptf_runner(enabled_sflow_interfaces=var['sflow_ports'].keys(), active_collectors="[]") duthost.command('sudo config save -y') reboot(duthost, localhost) assert wait_until(300, 20, duthost.critical_services_fully_started ), "Not all critical services are fully started" verify_show_sflow(duthost, status='down') for intf in var['sflow_ports']: var['sflow_ports'][intf]['ifindex'] = get_ifindex(duthost, intf) var['portmap'] = json.dumps(var['sflow_ports']) ptfhost.copy(content=var['portmap'], dest="/tmp/sflow_ports.json") partial_ptf_runner(enabled_sflow_interfaces=var['sflow_ports'].keys(), active_collectors="[]")
def testRebootSflowEnable(self, sflowbase_config, duthost, testbed_devices, localhost, partial_ptf_runner, ptfhost): duthost = testbed_devices["dut"] duthost.command("config sflow polling-interval 80") verify_show_sflow(duthost, status='up', polling_int=80) duthost.command('sudo config save -y') reboot(duthost, localhost) assert wait_until(300, 20, duthost.critical_services_fully_started ), "Not all critical services are fully started" verify_show_sflow(duthost, status='up', collector=['collector0', 'collector1'], polling_int=80) for intf in var['sflow_ports']: var['sflow_ports'][intf]['ifindex'] = get_ifindex(duthost, intf) verify_sflow_interfaces(duthost, intf, 'up', 512) var['portmap'] = json.dumps(var['sflow_ports']) ptfhost.copy(content=var['portmap'], dest="/tmp/sflow_ports.json") partial_ptf_runner(enabled_sflow_interfaces=var['sflow_ports'].keys(), active_collectors="['collector0','collector1']") # Test Polling partial_ptf_runner(polling_int=80, active_collectors="['collector0','collector1']")
def wait_until_fan_speed_set_to_default(dut, timeout=300, interval=10): wait_until(timeout, interval, fan_speed_set_to_default, dut)
def test_bgp_gr_helper_routes_perserved(duthost, nbrhosts, setup_bgp_graceful_restart): """ Verify that DUT routes are preserved when peer performed graceful restart """ config_facts = duthost.config_facts(host=duthost.hostname, source="running")['ansible_facts'] bgp_neighbors = config_facts.get('BGP_NEIGHBOR', {}) po = config_facts.get('PORTCHANNEL', {}) dev_nbr = config_facts.get('DEVICE_NEIGHBOR', {}) rtinfo_v4 = duthost.get_ip_route_info(ipaddress.ip_address(u'0.0.0.0')) if len(rtinfo_v4['nexthops']) == 0: pytest.skip("there is no next hop for v4 default route") rtinfo_v6 = duthost.get_ip_route_info(ipaddress.ip_address(u'::')) if len(rtinfo_v6['nexthops']) == 0: pytest.skip("there is no next hop for v6 default route") ifnames_v4 = [nh[1] for nh in rtinfo_v4['nexthops']] ifnames_v6 = [nh[1] for nh in rtinfo_v6['nexthops']] ifnames_common = [ifname for ifname in ifnames_v4 if ifname in ifnames_v6] ifname = ifnames_common[0] # get neighbor device connected ports nbr_ports = [] if ifname.startswith("PortChannel"): for member in po[ifname]['members']: nbr_ports.append(dev_nbr[member]['port']) else: pytest.skip( "Do not support peer device not connected via port channel") logger.info("neighbor device connected ports {}".format(nbr_ports)) # get nexthop ip for nh in rtinfo_v4['nexthops']: if nh[1] == ifname: bgp_nbr_ipv4 = nh[0] for nh in rtinfo_v6['nexthops']: if nh[1] == ifname: bgp_nbr_ipv6 = nh[0] # get the bgp neighbor bgp_nbr = bgp_neighbors[str(bgp_nbr_ipv4)] nbr_hostname = bgp_nbr['name'] nbrhost = nbrhosts[nbr_hostname]['host'] exabgp_sessions = ['exabgp_v4', 'exabgp_v6'] pytest_assert(nbrhost.check_bgp_session_state([], exabgp_sessions), \ "exabgp sessions {} are not up before graceful restart".format(exabgp_sessions)) # shutdown Rib agent, starting gr process logger.info("shutdown rib process on neighbor {}".format(nbr_hostname)) nbrhost.kill_bgpd() # wait till DUT enter NSF state pytest_assert(wait_until(60, 5, duthost.check_bgp_session_nsf, bgp_nbr_ipv4), \ "neighbor {} does not enter NSF state".format(bgp_nbr_ipv4)) pytest_assert(wait_until(60, 5, duthost.check_bgp_session_nsf, bgp_nbr_ipv6), \ "neighbor {} does not enter NSF state".format(bgp_nbr_ipv6)) # confirm ip route still there rtinfo_v4 = duthost.get_ip_route_info(ipaddress.ip_address(u'0.0.0.0')) pytest_assert(ipaddress.ip_address(bgp_nbr_ipv4) in [ nh[0] for nh in rtinfo_v4['nexthops'] ], \ "cannot find nexthop {} in the new default route nexthops. {}".format(bgp_nbr_ipv4, rtinfo_v4)) rtinfo_v6 = duthost.get_ip_route_info(ipaddress.ip_address(u'::')) pytest_assert(ipaddress.ip_address(bgp_nbr_ipv6) in [ nh[0] for nh in rtinfo_v6['nexthops'] ], \ "cannot find nexthop {} in the new default route nexthops. {}".format(bgp_nbr_ipv6, rtinfo_v6)) # shutdown the connected ports from nbr for nbr_port in nbr_ports: nbrhost.shutdown(nbr_port) try: # start Rib agent logger.info("startup rib process on neighbor {}".format(nbr_hostname)) nbrhost.start_bgpd() # wait for exabgp sessions to establish pytest_assert(wait_until(300, 10, nbrhost.check_bgp_session_state, [], exabgp_sessions), \ "exabgp sessions {} are not coming back".format(exabgp_sessions)) except: raise finally: # unshut the connected ports from nbr for nbr_port in nbr_ports: nbrhost.no_shutdown(nbr_port) # confirm bgp session up graceful_restarted_bgp_sessions = [str(bgp_nbr_ipv4), str(bgp_nbr_ipv6)] pytest_assert(wait_until(300, 10, duthost.check_bgp_session_state, graceful_restarted_bgp_sessions), \ "graceful restarted bgp sessions {} are not coming back".format(graceful_restarted_bgp_sessions))
def reboot_and_check(localhost, dut, interfaces, reboot_type=REBOOT_TYPE_COLD, reboot_helper=None, reboot_kwargs=None): """ Perform the specified type of reboot and check platform status. @param localhost: The Localhost object. @param dut: The AnsibleHost object of DUT. @param interfaces: DUT's interfaces defined by minigraph @param reboot_type: The reboot type, pre-defined const that has name convention of REBOOT_TYPE_XXX. @param reboot_helper: The helper function used only by power off reboot @param reboot_kwargs: The argument used by reboot_helper """ logging.info("Run %s reboot on DUT" % reboot_type) assert reboot_type in reboot_ctrl_dict.keys( ), "Unknown reboot type %s" % reboot_type reboot_timeout = reboot_ctrl_dict[reboot_type]["timeout"] reboot_cause = reboot_ctrl_dict[reboot_type]["cause"] dut_datetime = datetime.strptime( dut.command('date -u +"%Y-%m-%d %H:%M:%S"')["stdout"], "%Y-%m-%d %H:%M:%S") if reboot_type == REBOOT_TYPE_POWEROFF: assert reboot_helper is not None, "A reboot function must be provided for power off reboot" reboot_helper(reboot_kwargs) localhost.wait_for(host=dut.hostname, port=22, state="stopped", delay=10, timeout=120) else: reboot_cmd = reboot_ctrl_dict[reboot_type]["command"] reboot_task, reboot_res = dut.command(reboot_cmd, module_ignore_errors=True, module_async=True) logging.info("Wait for DUT to go down") res = localhost.wait_for(host=dut.hostname, port=22, state="stopped", timeout=180, module_ignore_errors=True) if "failed" in res: try: logging.error( "Wait for switch down failed, try to kill any possible stuck reboot task" ) pid = dut.command("pgrep -f '%s'" % reboot_cmd)["stdout"] dut.command("kill -9 %s" % pid) reboot_task.terminate() logging.error("Result of command '%s': " + str(reboot_res.get(timeout=0))) except Exception as e: logging.error( "Exception raised while cleanup reboot task and get result: " + repr(e)) logging.info("Wait for DUT to come back") localhost.wait_for(host=dut.hostname, port=22, state="started", delay=10, timeout=reboot_timeout) logging.info("Check the uptime to verify whether reboot was performed") dut_uptime = datetime.strptime( dut.command("uptime -s")["stdout"], "%Y-%m-%d %H:%M:%S") assert float(dut_uptime.strftime("%s")) - float( dut_datetime.strftime("%s")) > 10, "Device did not reboot" logging.info("Wait until all critical services are fully started") check_critical_services(dut) logging.info("Check reboot cause") check_reboot_cause(dut, reboot_cause) if reboot_ctrl_dict[reboot_type]["test_reboot_cause_only"]: logging.info( "Further checking skipped for %s test which intends to verify reboot-cause only" .format(reboot_type)) return logging.info("Wait some time for all the transceivers to be detected") assert wait_until(300, 20, check_interface_information, dut, interfaces), \ "Not all transceivers are detected or interfaces are up in 300 seconds" logging.info("Check transceiver status") check_transceiver_basic(dut, interfaces) logging.info("Check pmon daemon status") assert check_pmon_daemon_status(dut), "Not all pmon daemons running." if dut.facts["asic_type"] in ["mellanox"]: current_file_dir = os.path.dirname(os.path.realpath(__file__)) sub_folder_dir = os.path.join(current_file_dir, "mellanox") if sub_folder_dir not in sys.path: sys.path.append(sub_folder_dir) from check_hw_mgmt_service import check_hw_management_service from check_sysfs import check_sysfs logging.info("Check the hw-management service") check_hw_management_service(dut) logging.info("Check sysfs") check_sysfs(dut)