Пример #1
0
class TestPfcwdWb(SetupPfcwdFunc):
    """ Test PFCwd warm-reboot function and supporting methods """
    def storm_detect_path(self, port, queue, first_detect_after_wb=False):
        """
        Storm detection action and associated verifications

        Args:
            port(string) : DUT port
            queue(int): queue on the port that will be stormed
            first_detect_after_wb(bool): first detect iteration after warm reboot (default: False)
        """
        # for the first iteration after wb, do not write a marker to the log but specify the start msg from
        # where to search the logs
        start_marker = None
        if first_detect_after_wb:
            start_marker = "NOTICE swss#orchagent: :- setWarmStartState: orchagent warm start state changed to initialized"
        self.loganalyzer = LogAnalyzer(ansible_host=self.dut,
                                       marker_prefix="pfcwd_wb_storm_detect_port_{}_queue_{}".format(port, queue),
                                       start_marker=start_marker)
        marker = self.loganalyzer.init()
        time.sleep(5)
        ignore_file = os.path.join(TEMPLATES_DIR, "ignore_pfc_wd_messages")
        reg_exp = self.loganalyzer.parse_regexp_file(src=ignore_file)
        self.loganalyzer.ignore_regex.extend(reg_exp)
        self.loganalyzer.expect_regex = []
        self.loganalyzer.expect_regex.extend([EXPECT_PFC_WD_DETECT_RE])
        self.loganalyzer.match_regex = []

        # ongoing storm. no need to start a new one
        if not first_detect_after_wb:
            if not self.pfc_wd['fake_storm']:
                self.storm_handle[port][queue].start_storm()
                time.sleep(15 * len(self.pfc_wd['queue_indices']))
            else:
                PfcCmd.set_storm_status(self.dut, self.oid_map[(port, queue)], "enabled")
                time.sleep(5)
        else:
            # for the first iteration after wb, check the log for detect msgs for the ongoing storms
            self.loganalyzer.expected_matches_target = len(self.ports) * len(self.pfc_wd['queue_indices'])
            time.sleep(20)

        # storm detect check
        logger.info("Verify if PFC storm is detected on port {} queue {}".format(port, queue))
        self.loganalyzer.analyze(marker)

    def storm_restore_path(self, port, queue):
        """
        Storm restoration action and associated verifications

        Args:
            port(string) : DUT port
            queue(int): queue on the port where storm would be restored
        """
        marker = self.loganalyzer.update_marker_prefix("pfcwd_wb_storm_restore_port_{}_queue_{}".format(port, queue))
        time.sleep(5)
        ignore_file = os.path.join(TEMPLATES_DIR, "ignore_pfc_wd_messages")
        reg_exp = self.loganalyzer.parse_regexp_file(src=ignore_file)
        self.loganalyzer.ignore_regex.extend(reg_exp)
        self.loganalyzer.expect_regex = []
        self.loganalyzer.expect_regex.extend([EXPECT_PFC_WD_RESTORE_RE])
        self.loganalyzer.match_regex = []
        self.loganalyzer.expected_matches_target = 0

        if not self.pfc_wd['fake_storm']:
            self.storm_handle[port][queue].stop_storm()
            time.sleep(15)
        else:
            PfcCmd.set_storm_status(self.dut, self.oid_map[(port, queue)], "disabled")
            time.sleep(5)

        # storm restore check
        logger.info("Verify if PFC storm is restored on port {}".format(port))
        self.loganalyzer.analyze(marker)

    def defer_fake_storm(self, port, queue, start_defer, stop_defer):
        time.sleep(start_defer)
        DUT_ACTIVE.wait()
        PfcCmd.set_storm_status(self.dut, self.oid_map[(port, queue)], "enabled")
        time.sleep(stop_defer)
        DUT_ACTIVE.wait()
        PfcCmd.set_storm_status(self.dut, self.oid_map[(port, queue)], "disabled")

    def run_test(self, port, queue, detect=True, storm_start=True, first_detect_after_wb=False,
                 storm_defer=False):
        """
        Test method that invokes the storm detection and restoration path which includes the traffic
        test

        Args:
            port(string) : DUT port
            queue(int): queue on the port which would be stormed/restored
            detect(bool): if the detect logic needs to be called (default: True)
            storm_start(bool): used to decide certain actions in the detect logic (default: True)
            first_detect_after_wb(bool): used to decide certain actions in the detect logic (default: False)
            storm_defer(bool): use the storm defer logic or not (default: False)
        """
        # for deferred storm, return to main loop for next action which is warm boot
        if storm_defer:
            if not self.pfc_wd['fake_storm']:
                self.storm_handle[port][queue].start_storm()
                self.storm_handle[port][queue].stop_storm()
            else:
                thread = InterruptableThread(
                    target=self.defer_fake_storm,
                    args=(port, queue, self.pfc_wd['storm_start_defer'],
                          self.pfc_wd['storm_stop_defer']))
                thread.daemon = True
                thread.start()
                self.storm_threads.append(thread)
            return

        if detect:
            if storm_start or first_detect_after_wb:
                logger.info("--- Storm detection path for port {} queue {} ---".format(port, queue))
                self.storm_detect_path(port, queue, first_detect_after_wb=first_detect_after_wb)
        else:
            logger.info("--- Storm restoration path for port {} queue {} ---".format(port, queue))
            self.storm_restore_path(port, queue)
        # test pfcwd functionality on a storm/restore
        self.traffic_inst.verify_wd_func(detect=detect)

    @pytest.fixture(autouse=True)
    def pfcwd_wb_test_cleanup(self):
        """
        Cleanup method
        """
        yield

        # stop all threads that might stuck in wait
        DUT_ACTIVE.set()
        for thread in self.storm_threads:
            thread_exception = thread.join(timeout=0.1,
                                           suppress_exception=True)
            if thread_exception:
                logger.debug("Exception in thread %r:", thread)
                logger.debug(
                    "".join(traceback.format_exception(*thread_exception))
                    )
        self.stop_all_storm()
        time.sleep(5)
        logger.info("--- Stop PFC WD ---")
        self.dut.command("pfcwd stop")

    def stop_all_storm(self):
        """
        Stop all the storms after each test run
        """
        if self.storm_handle:
            logger.info("--- Stopping storm on all ports ---")
            for port in self.storm_handle:
                for queue in self.storm_handle[port]:
                    if self.storm_handle[port][queue]:
                        logger.info("--- Stop pfc storm on port {} queue {}".format(port, queue))
                        self.storm_handle[port][queue].stop_storm()
                    else:
                        logger.info("--- Disabling fake storm on port {} queue {}".format(port, queue))
                        PfcCmd.set_storm_status(self.dut, self.oid_map[(port, queue)], "disabled")

    def pfcwd_wb_helper(self, fake_storm, testcase_actions, setup_pfc_test, fanout_graph_facts, ptfhost,
                        duthost, localhost, fanouthosts, two_queues):
        """
        Helper method that initializes the vars and starts the test execution

        Args:
            fake_storm(bool): if fake storm is enabled or disabled
            testcase_actions(list): list of actions that the test will go through
            setup_pfc_test(fixture): module scoped autouse fixture
            fanout_graph_facts(fixture): fanout info
            ptfhost(AnsibleHost): PTF instance
            duthost(AnsibleHost): DUT instance
            localhost(AnsibleHost): local instance
            fanouthosts(AnsibleHost): fanout instance
        """
        setup_info = setup_pfc_test
        self.fanout_info = fanout_graph_facts
        self.ptf = ptfhost
        self.dut = duthost
        self.fanout = fanouthosts
        self.timers = setup_info['pfc_timers']
        self.ports = setup_info['selected_test_ports']
        self.neighbors = setup_info['neighbors']
        dut_facts = self.dut.facts
        self.peer_dev_list = dict()
        self.seed = int(datetime.datetime.today().day)
        self.two_queues = two_queues
        self.storm_handle = dict()
        bitmask = 0
        storm_deferred = 0
        storm_restored = 0
        self.max_wait = 0
        self.fake_storm = fake_storm
        self.oid_map = dict()
        self.storm_threads = []

        for t_idx, test_action in enumerate(testcase_actions):
            if 'warm-reboot' in test_action:
                reboot(self.dut, localhost, reboot_type="warm")
                continue

            # one of the factors to decide if the storm needs to be started
            storm_restored = bitmask and (bitmask & 2)
            # if the action prior to the warm-reboot was a 'storm_defer', ensure that all the storms are
            # stopped
            storm_deferred = bitmask and (bitmask & 4)
            if storm_deferred:
                logger.info("Wait for all the deferred storms to start and stop ...")
                join_all(self.storm_threads, self.max_wait)
                self.storm_threads = []
                self.storm_handle = dict()

            bitmask = (1 << ACTIONS[test_action])
            for p_idx, port in enumerate(self.ports):
                logger.info("")
                logger.info("--- Testing on {} ---".format(port))
                self.setup_test_params(port, setup_info['vlan'], p_idx)
                for q_idx, queue in enumerate(self.pfc_wd['queue_indices']):
                    if not t_idx or storm_deferred:
                        if not q_idx:
                            self.storm_handle[port] = dict()
                        self.storm_handle[port][queue] = None

                        # setup the defer parameters if the storm is deferred currently
                        if (bitmask & 4):
                            self.storm_defer_setup()

                        if not self.pfc_wd['fake_storm']:
                            self.storm_setup(port, queue, storm_defer=(bitmask & 4))
                        else:
                            self.oid_map[(port, queue)] = PfcCmd.get_queue_oid(self.dut, port, queue)

                    self.traffic_inst = SendVerifyTraffic(self.ptf, dut_facts['router_mac'], self.pfc_wd, queue)
                    self.run_test(port, queue, detect=(bitmask & 1),
                                  storm_start=not t_idx or storm_deferred or storm_restored,
                                  first_detect_after_wb=(t_idx == 2 and not p_idx and not q_idx and not storm_deferred),
                                  storm_defer=(bitmask & 4))

    @pytest.fixture(params=['no_storm', 'storm', 'async_storm'])
    def testcase_action(self, request):
        """
        Parameters to invoke the pfcwd warm boot test

        Args:
            request(pytest) : pytest request object

        Yields:
            testcase_action(string) : testcase to execute
        """
        yield request.param

    def test_pfcwd_wb(self, fake_storm, testcase_action, setup_pfc_test, fanout_graph_facts, ptfhost, duthosts,
                      rand_one_dut_hostname, localhost, fanouthosts, two_queues):
        """
        Tests PFCwd warm reboot with various testcase actions

        Args:
            fake_storm(fixture): fake storm status
            testcase_action(fixture): testcase to execute (values: 'no_storm', 'storm', 'async_storm')

                'no_storm' : PFCwd storm detection/restore before and after warm reboot
                'storm' : PFC storm started and detected before warm-reboot. Storm is ongoing during warm boot and lasts
                          past the warm boot finish. Verifies if the storm is detected after warm-reboot.
                          PFC storm is stopped and 465 restored after warm boot
                'async_storm': PFC storm asynchronously starts at a random time and lasts a random period at fanout.
                               Warm reboot is done. Wait for all the storms to finish and then verify the storm detect/restore
                               logic

            setup_pfc_test(fixture) : Module scoped autouse fixture for PFCwd
            fanout_graph_facts(fixture) : fanout graph info
            ptfhost(AnsibleHost) : ptf host instance
            duthost(AnsibleHost) : DUT instance
            localhost(AnsibleHost) : localhost instance
            fanouthosts(AnsibleHost): fanout instance
        """
        duthost = duthosts[rand_one_dut_hostname]
        logger.info("--- {} ---".format(TESTCASE_INFO[testcase_action]['desc']))
        self.pfcwd_wb_helper(fake_storm, TESTCASE_INFO[testcase_action]['test_sequence'], setup_pfc_test,
                             fanout_graph_facts, ptfhost, duthost, localhost, fanouthosts, two_queues)
Пример #2
0
def consumes_memory_and_checks_monit(duthost, container_name, vm_workers, new_syntax_enabled):
    """Invokes the 'stress' utility to consume memory more than the threshold asynchronously
    and checks whether the container can be stopped and restarted. After container was restarted,
    'stress' utility will be invoked again to consume memory and checks whether Monit was able to
    restart this container with or without help of new syntax.
    Loganalyzer is leveraged to check whether the log messages related to container stopped
    and started were generated.

    Args:
        duthost: The AnsibleHost object of DuT.
        container_name: Name of container.
        vm_workers: Number of workers which does the spinning on malloc()/free()
          to consume memory.
        new_syntax_enabled: Checks to make sure container will be restarted if it is set to be 
          `True`.

    Returns:
        None.
    """
    expected_alerting_messages = []
    expected_alerting_messages.append(".*restart_service.*Restarting service 'telemetry'.*")
    expected_alerting_messages.append(".*Stopping Telemetry container.*")
    expected_alerting_messages.append(".*Stopped Telemetry container.*")
    expected_alerting_messages.append(".*Starting Telemetry container.*")
    expected_alerting_messages.append(".*Started Telemetry container.*")

    loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="test_memory_checker")
    loganalyzer.expect_regex = []
    loganalyzer.expect_regex.extend(expected_alerting_messages)
    marker = loganalyzer.init()

    thread_pool = ThreadPool()
    thread_pool.apply_async(consume_memory, (duthost, container_name, vm_workers))

    logger.info("Sleep '{}' seconds to wait for the alerting messages from syslog ...".format(WAITING_SYSLOG_MSG_SECS))
    time.sleep(WAITING_SYSLOG_MSG_SECS)

    logger.info("Checking the alerting messages related to container restart ...")
    loganalyzer.analyze(marker)
    logger.info("Found all the expected alerting messages from syslog!")

    logger.info("Waiting for '{}' container to be restarted ...".format(container_name))
    restarted = wait_until(CONTAINER_RESTART_THRESHOLD_SECS,
                           CONTAINER_CHECK_INTERVAL_SECS,
                           0,
                           check_container_state, duthost, container_name, True)
    pytest_assert(restarted, "Failed to restart '{}' container!".format(container_name))
    logger.info("'{}' container is restarted.".format(container_name))

    logger.info("Running 'stress' utility again in '{}' ...".format(container_name))
    thread_pool.apply_async(consume_memory, (duthost, container_name, vm_workers))

    check_counter = 0
    marker = loganalyzer.update_marker_prefix("test_monit_counter")
    logger.info("Checking memory usage of '{}' every 30 seconds for 6 times ...".format(container_name))
    while check_counter < 6:
        check_counter += 1
        mem_usage = get_container_mem_usage(duthost, container_name)
        logger.info("Memory usage of '{}' is '{}'".format(container_name, mem_usage))
        time.sleep(30)

    logger.info("Analyzing syslog messages to verify whether '{}' is restarted ...".format(container_name))
    analyzing_result = loganalyzer.analyze(marker, fail=False)
    if not new_syntax_enabled:
        pytest_assert(analyzing_result["total"]["expected_match"] == 0,
                      "Monit can reset counter and restart '{}'!".format(container_name))
        logger.info("Monit was unable to reset its counter and '{}' can not be restarted!".format(container_name))
    else:
        pytest_assert(analyzing_result["total"]["expected_match"] == len(expected_alerting_messages),
                      "Monit still can not restart '{}' with the help of new syntax!".format(container_name))
        logger.info("Monit was able to restart '{}' with the help of new syntax!".format(container_name))