Пример #1
0
    def execute_test(self,
                     duthost,
                     syslog_marker,
                     ignore_regex=None,
                     expect_regex=None,
                     expect_errors=False):
        """
        Helper function that loads each template on the DUT and verifies the expected behavior

        Args:
            duthost (AnsibleHost): instance
            syslog_marker (string): marker prefix name to be inserted in the syslog
            ignore_regex (string): file containing regexs to be ignored by loganalyzer
            expect_regex (string): regex pattern that is expected to be present in the syslog
            expect_erros (bool): if the test expects an error msg in the syslog or not. Default: False

        Returns:
            None
        """
        loganalyzer = LogAnalyzer(ansible_host=duthost,
                                  marker_prefix=syslog_marker)

        if ignore_regex:
            ignore_file = os.path.join(TEMPLATES_DIR, ignore_regex)
            reg_exp = loganalyzer.parse_regexp_file(src=ignore_file)
            loganalyzer.ignore_regex.extend(reg_exp)

        if expect_regex:
            loganalyzer.expect_regex = []
            loganalyzer.expect_regex.extend(expect_regex)

        loganalyzer.match_regex = []
        with loganalyzer(fail=not expect_errors):
            cmd = "sonic-cfggen -j {}/{}.json --write-to-db".format(
                DUT_RUN_DIR, syslog_marker)
            out = duthost.command(cmd)
            pytest_assert(
                out["rc"] == 0, "Failed to execute cmd {}: Error: {}".format(
                    cmd, out["stderr"]))
Пример #2
0
    def acl_rules(self, duthosts, rand_one_dut_hostname, localhost, setup, acl_table, populate_vlan_arp_entries, tbinfo, ip_version):
        """Setup/teardown ACL rules for the current set of tests.

        Args:
            duthosts: All DUTs belong to the testbed.
            rand_one_dut_hostname: hostname of a random chosen dut to run test.
            localhost: The host from which tests are run.
            setup: Parameters for the ACL tests.
            acl_table: Configuration info for the ACL table.
            populate_vlan_arp_entries: A function to populate ARP/FDB tables for VLAN interfaces.

        """
        duthost = duthosts[rand_one_dut_hostname]
        loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="acl_rules")
        loganalyzer.load_common_config()

        try:
            loganalyzer.expect_regex = [LOG_EXPECT_ACL_RULE_CREATE_RE]
            with loganalyzer:
                self.setup_rules(duthost, acl_table, ip_version)
                time.sleep(5)
                
            self.post_setup_hook(duthost, localhost, populate_vlan_arp_entries, tbinfo)

            assert self.check_rule_counters(duthost), "Rule counters should be ready!"

        except LogAnalyzerError as err:
            # Cleanup Config DB if rule creation failed
            logger.error("ACL rule application failed, attempting to clean-up...")
            self.teardown_rules(duthost)
            raise err

        try:
            yield
        finally:
            loganalyzer.expect_regex = [LOG_EXPECT_ACL_RULE_REMOVE_RE]
            with loganalyzer:
                logger.info("Removing ACL rules")
                self.teardown_rules(duthost)
Пример #3
0
def acl_table(duthost, acl_table_config, backup_and_restore_config_db_module):
    """Apply ACL table configuration and remove after tests.

    Args:
        duthost: A fixture to interact with the DUT.
        acl_table_config: A dictionary describing the ACL table configuration to apply.
        backup_and_restore_config_db_module: A fixture that handles restoring Config DB
                after the tests are over.

    Yields:
        The ACL table configuration.

    """
    table_name = acl_table_config["table_name"]
    config_file = acl_table_config["config_file"]

    loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="acl")
    loganalyzer.load_common_config()

    try:
        loganalyzer.expect_regex = [LOG_EXPECT_ACL_TABLE_CREATE_RE]
        with loganalyzer:
            logger.info("Creating ACL table from config file: \"{}\"".format(config_file))

            # TODO: Use `config` CLI to create ACL table
            duthost.command("sonic-cfggen -j {} --write-to-db".format(config_file))
    except LogAnalyzerError as err:
        # Cleanup Config DB if table creation failed
        logger.error("ACL table creation failed, attempting to clean-up...")
        duthost.command("config acl remove table {}".format(table_name))
        raise err

    try:
        yield acl_table_config
    finally:
        loganalyzer.expect_regex = [LOG_EXPECT_ACL_TABLE_REMOVE_RE]
        with loganalyzer:
            logger.info("Removing ACL table \"{}\"".format(table_name))
            duthost.command("config acl remove table {}".format(table_name))
Пример #4
0
    def run_test_in_reinstall_loop(self):
        logger.info("Verify MAC in image reinstall loop")
        duthost = self.request.getfixturevalue('duthost')
        localhost = self.request.getfixturevalue('localhost')

        for counter in range(1, self.iteration + 1):
            current_minigraph = self.minigraph1 if counter % 2 == 1 else self.minigraph2

            logger.info("Iteration #{}".format(counter))
            if current_minigraph:
                logger.info(
                    "Copy specified minigraph {} to the /etc/sonic folder".
                    format(current_minigraph))
                duthost.copy(src=current_minigraph,
                             dest="/etc/sonic/minigraph.xml")

            loganalyzer = LogAnalyzer(ansible_host=duthost,
                                      marker_prefix="read_mac_metadata")
            loganalyzer.match_regex = [".*can't parse mac address 'None'*"]

            with loganalyzer:
                self.deploy_image_to_duthost(duthost, counter)
                reboot(duthost, localhost, wait=120)
                logger.info("Wait until system is stable")
                pytest_assert(
                    wait_until(300, 20,
                               duthost.critical_services_fully_started),
                    "Not all critical services are fully started")

            if current_minigraph:
                logger.info(
                    "Execute cli 'config load_minigraph -y' to apply new minigraph"
                )
                config_reload(duthost, config_source='minigraph')

            logger.info("Remove old (not current) sonic image")
            duthost.reduce_and_add_sonic_images(disk_used_pcent=1)
            self.check_mtu_and_interfaces(duthost)
Пример #5
0
def mirroring(duthosts, rand_one_dut_hostname, neighbor_ip, mirror_setup, gre_version):
    """
    fixture gathers all configuration fixtures
    :param duthost: DUT host
    :param mirror_setup: mirror_setup fixture
    :param mirror_config: mirror_config fixture
    """
    duthost = duthosts[rand_one_dut_hostname]
    logger.info("Adding mirror_session to DUT")
    acl_rule_file = os.path.join(mirror_setup['dut_tmp_dir'], ACL_RULE_PERSISTENT_FILE)
    extra_vars = {
        'acl_table_name':  EVERFLOW_TABLE_NAME,
    }
    logger.info('Extra variables for MIRROR table:\n{}'.format(pprint.pformat(extra_vars)))
    duthost.host.options['variable_manager'].extra_vars.update(extra_vars)

    duthost.template(src=os.path.join(TEMPLATE_DIR, ACL_RULE_PERSISTENT_TEMPLATE), dest=acl_rule_file)
    duthost.command('config mirror_session add {} {} {} {} {} {} {}'
    .format(SESSION_INFO['name'], SESSION_INFO['src_ip'], neighbor_ip,
     SESSION_INFO['dscp'], SESSION_INFO['ttl'], SESSION_INFO['gre'], SESSION_INFO['queue']))

    logger.info('Loading acl mirror rules ...')
    load_rule_cmd = "acl-loader update full {} --session_name={}".format(acl_rule_file, SESSION_INFO['name'])
    duthost.command('{}'.format(load_rule_cmd))

    try:
        yield
    finally:
        loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix='acl')
        loganalyzer.load_common_config()

        try:
            loganalyzer.expect_regex = [LOG_EXCEPT_MIRROR_SESSION_REMOVE]
            with loganalyzer:
                teardown_mirroring(duthost, mirror_setup['dut_tmp_dir'])
        except LogAnalyzerError as err:
            raise err
Пример #6
0
    def test_pfcwd_port_toggle(self, request, fake_storm, setup_pfc_test, fanout_graph_facts, tbinfo, ptfhost, duthosts, rand_one_dut_hostname, fanouthosts):
        """
        Test PfCWD functionality after toggling port

        Test verifies the following:
            1. Select the port and lossless queue
            2. Start PFCWD on selected test port
            3. Start PFC storm on selected test port and lossless queue
            4. Verify that PFC storm is detected
            5. Stop PFC storm on selected test port and lossless queue
            6. Verify that PFC storm is restored
            7. Toggle test port (put administrativelly down and then up)
            8. Verify that PFC storm is not detected

        Args:
            request(object) : pytest request object
            fake_storm(fixture) : Module scoped fixture for enable/disable fake storm
            setup_pfc_test(fixture) : Module scoped autouse fixture for PFCWD
            fanout_graph_facts(fixture) : Fanout graph info
            tbinfo(fixture) : Testbed info
            ptfhost(AnsibleHost) : PTF host instance
            duthost(AnsibleHost) : DUT instance
            fanouthosts(AnsibleHost): Fanout instance
        """
        duthost = duthosts[rand_one_dut_hostname]
        setup_info = setup_pfc_test
        self.fanout_info = fanout_graph_facts
        self.ptf = ptfhost
        self.dut = duthost
        self.fanout = fanouthosts
        self.timers = setup_info['pfc_timers']
        self.ports = setup_info['selected_test_ports']
        self.neighbors = setup_info['neighbors']
        dut_facts = self.dut.facts
        self.peer_dev_list = dict()
        self.fake_storm = fake_storm
        self.storm_hndle = None
        action = "dontcare"

        for idx, port in enumerate(self.ports):
             logger.info("")
             logger.info("--- Testing port toggling with PFCWD enabled on {} ---".format(port))
             self.setup_test_params(port, setup_info['vlan'], init=not idx)
             self.traffic_inst = SendVerifyTraffic(self.ptf, dut_facts['router_mac'], self.pfc_wd)
             pfc_wd_restore_time_large = request.config.getoption("--restore-time")
             # wait time before we check the logs for the 'restore' signature. 'pfc_wd_restore_time_large' is in ms.
             self.timers['pfc_wd_wait_for_restore_time'] = int(pfc_wd_restore_time_large / 1000 * 2)

             try:
                 # Verify that PFC storm is detected and restored
                 self.stats = PfcPktCntrs(self.dut, action)
                 logger.info("{} on port {}".format(WD_ACTION_MSG_PFX[action], port))
                 self.run_test(self.dut, port, action)

                 # Toggle test port and verify that PFC storm is not detected
                 loganalyzer = LogAnalyzer(ansible_host=self.dut, marker_prefix="pfc_function_storm_detect_{}_port_{}".format(action, port))
                 marker = loganalyzer.init()
                 ignore_file = os.path.join(TEMPLATES_DIR, "ignore_pfc_wd_messages")
                 reg_exp = loganalyzer.parse_regexp_file(src=ignore_file)
                 loganalyzer.ignore_regex.extend(reg_exp)
                 loganalyzer.expect_regex = []
                 loganalyzer.expect_regex.extend([EXPECT_PFC_WD_DETECT_RE])
                 loganalyzer.match_regex = []

                 port_toggle(self.dut, tbinfo, ports=[port])

                 logger.info("Verify that PFC storm is not detected on port {}".format(port))
                 result = loganalyzer.analyze(marker, fail=False)
                 if result["total"]["expected_missing_match"] == 0:
                     pytest.fail(result)

             except Exception as e:
                 pytest.fail(str(e))

             finally:
                 if self.storm_hndle:
                     logger.info("--- Stop PFC storm on port {}".format(port))
                     self.storm_hndle.stop_storm()
                 else:
                     logger.info("--- Disabling fake storm on port {} queue {}".format(port, self.queue_oid))
                     PfcCmd.set_storm_status(self.dut, self.queue_oid, "disabled")
                 logger.info("--- Stop PFCWD ---")
                 self.dut.command("pfcwd stop")
Пример #7
0
def advanceboot_loganalyzer(duthosts, rand_one_dut_hostname, request):
    """
    Advance reboot log analysis.
    This fixture starts log analysis at the beginning of the test. At the end,
    the collected expect messages are verified and timing of start/stop is calculated.

    Args:
        duthosts : List of DUT hosts
        rand_one_dut_hostname: hostname of a randomly selected DUT
    """
    duthost = duthosts[rand_one_dut_hostname]
    test_name = request.node.name
    if "warm" in test_name:
        reboot_type = "warm"
    elif "fast" in test_name:
        reboot_type = "fast"
    else:
        reboot_type = "unknown"
    # Currently, advanced reboot test would skip for kvm platform if the test has no device_type marker for vs.
    # Doing the same skip logic in this fixture to avoid running loganalyzer without the test executed
    if duthost.facts['platform'] == 'x86_64-kvm_x86_64-r0':
        device_marks = [
            arg for mark in request.node.iter_markers(name='device_type')
            for arg in mark.args
        ]
        if 'vs' not in device_marks:
            pytest.skip('Testcase not supported for kvm')

    loganalyzer = LogAnalyzer(
        ansible_host=duthost,
        marker_prefix="test_advanced_reboot_{}".format(test_name),
        additional_files={
            '/var/log/swss/sairedis.rec':
            'recording on: /var/log/swss/sairedis.rec',
            '/var/log/frr/bgpd.log': ''
        })
    marker = loganalyzer.init()
    loganalyzer.load_common_config()

    ignore_file = os.path.join(TEMPLATES_DIR, "ignore_boot_messages")
    expect_file = os.path.join(TEMPLATES_DIR, "expect_boot_messages")
    ignore_reg_exp = loganalyzer.parse_regexp_file(src=ignore_file)
    expect_reg_exp = loganalyzer.parse_regexp_file(src=expect_file)

    loganalyzer.ignore_regex.extend(ignore_reg_exp)
    loganalyzer.expect_regex = []
    loganalyzer.expect_regex.extend(expect_reg_exp)
    loganalyzer.match_regex = []

    yield

    result = loganalyzer.analyze(marker, fail=False)
    analyze_result = {"time_span": dict(), "offset_from_kexec": dict()}
    offset_from_kexec = dict()

    for key, messages in result["expect_messages"].items():
        if "syslog" in key or "bgpd.log" in key:
            analyze_log_file(duthost, messages, analyze_result,
                             offset_from_kexec)
        elif "sairedis.rec" in key:
            analyze_sairedis_rec(messages, analyze_result, offset_from_kexec)

    for marker, time_data in analyze_result["offset_from_kexec"].items():
        marker_start_time = time_data.get("timestamp", {}).get("Start")
        reboot_start_time = analyze_result.get("reboot_time",
                                               {}).get("timestamp",
                                                       {}).get("Start")
        if reboot_start_time and reboot_start_time != "N/A" and marker_start_time:
            time_data["time_taken"] = (datetime.strptime(marker_start_time, FMT) -\
                datetime.strptime(reboot_start_time, FMT)).total_seconds()
        else:
            time_data["time_taken"] = "N/A"

    get_data_plane_report(analyze_result, reboot_type)
    result_summary = get_report_summary(analyze_result, reboot_type)
    logging.info(json.dumps(analyze_result, indent=4))
    logging.info(json.dumps(result_summary, indent=4))
    report_file_name = request.node.name + "_report.json"
    summary_file_name = request.node.name + "_summary.json"
    report_file_dir = os.path.realpath((os.path.join(os.path.dirname(__file__),\
        "../logs/platform_tests/")))
    report_file_path = report_file_dir + "/" + report_file_name
    summary_file_path = report_file_dir + "/" + summary_file_name
    if not os.path.exists(report_file_dir):
        os.makedirs(report_file_dir)
    with open(report_file_path, 'w') as fp:
        json.dump(analyze_result, fp, indent=4)
    with open(summary_file_path, 'w') as fp:
        json.dump(result_summary, fp, indent=4)
Пример #8
0
def test_monitoring_critical_processes(duthosts, rand_one_dut_hostname,
                                       tbinfo):
    """Tests the feature of monitoring critical processes by Monit and Supervisord.

    This function will check whether names of critical processes will appear
    in the syslog if the autorestart were disabled and these critical processes
    were stopped.

    Args:
        duthosts: list of DUTs.
        rand_one_dut_hostname: hostname of DUT.
        tbinfo: Testbed information.

    Returns:
        None.
    """
    duthost = duthosts[rand_one_dut_hostname]

    loganalyzer = LogAnalyzer(ansible_host=duthost,
                              marker_prefix="monitoring_critical_processes")
    loganalyzer.expect_regex = []
    bgp_neighbors = duthost.get_bgp_neighbors()
    up_bgp_neighbors = [
        k.lower() for k, v in bgp_neighbors.items()
        if v["state"] == "established"
    ]

    skip_containers = []
    skip_containers.append("database")
    skip_containers.append("gbsyncd")
    # Skip 'acms' container since 'acms' process is not running on lab devices and
    # another process `cert_converter.py' is set to auto-restart if exited.
    skip_containers.append("acms")
    # Skip 'radv' container on devices whose role is not T0.
    if tbinfo["topo"]["type"] != "t0":
        skip_containers.append("radv")

    containers_in_namespaces = get_containers_namespace_ids(
        duthost, skip_containers)

    if "20191130" in duthost.os_version:
        expected_alerting_messages = get_expected_alerting_messages_monit(
            duthost, containers_in_namespaces)
    else:
        expected_alerting_messages = get_expected_alerting_messages_supervisor(
            duthost, containers_in_namespaces)

    loganalyzer.expect_regex.extend(expected_alerting_messages)
    marker = loganalyzer.init()

    stop_critical_processes(duthost, containers_in_namespaces)

    # Wait for 70 seconds such that Supervisord/Monit has a chance to write alerting message into syslog.
    logger.info(
        "Sleep 70 seconds to wait for the alerting messages in syslog...")
    time.sleep(70)

    logger.info("Checking the alerting messages from syslog...")
    loganalyzer.analyze(marker)
    logger.info("Found all the expected alerting messages from syslog!")

    logger.info("Executing the config reload...")
    config_reload(duthost)
    logger.info("Executing the config reload was done!")

    ensure_all_critical_processes_running(duthost, containers_in_namespaces)

    if not postcheck_critical_processes_status(duthost, up_bgp_neighbors):
        pytest.fail("Post-check failed after testing the container checker!")
    logger.info(
        "Post-checking status of critical processes and BGP sessions was done!"
    )
Пример #9
0
def advanceboot_loganalyzer(duthosts, rand_one_dut_hostname):
    """
    Advance reboot log analysis.
    This fixture starts log analysis at the beginning of the test. At the end,
    the collected expect messages are verified and timing of start/stop is calculated.

    Args:
        duthosts : List of DUT hosts
        rand_one_dut_hostname: hostname of a randomly selected DUT
    """
    duthost = duthosts[rand_one_dut_hostname]
    loganalyzer = LogAnalyzer(ansible_host=duthost,
                              marker_prefix="test_advanced_reboot")
    marker = loganalyzer.init()
    loganalyzer.load_common_config()

    ignore_file = os.path.join(TEMPLATES_DIR, "ignore_boot_messages")
    expect_file = os.path.join(TEMPLATES_DIR, "expect_boot_messages")
    ignore_reg_exp = loganalyzer.parse_regexp_file(src=ignore_file)
    expect_reg_exp = loganalyzer.parse_regexp_file(src=expect_file)

    loganalyzer.ignore_regex.extend(ignore_reg_exp)
    loganalyzer.expect_regex = []
    loganalyzer.expect_regex.extend(expect_reg_exp)
    loganalyzer.match_regex = []

    yield

    result = loganalyzer.analyze(marker, fail=False)
    messages = result["expect_messages"].values()
    if not messages:
        logging.error("Expected messages not found in syslog")
        return
    messages = messages[0]

    service_restart_times = dict()
    service_patterns = {
        "Stopping": re.compile(r'.*Stopping.*service.*'),
        "Stopped": re.compile(r'.*Stopped.*service.*'),
        "Starting": re.compile(r'.*Starting.*service.*'),
        "Started": re.compile(r'.*Started.*service.*')
    }

    def service_time_check(message, status):
        time = message.split(duthost.hostname)[0].strip()
        service_name = message.split(status + " ")[1].split()[0]
        service_dict = service_restart_times.get(service_name,
                                                 {"timestamp": {}})
        timestamps = service_dict.get("timestamp")
        if status in timestamps:
            service_dict[status +
                         " count"] = service_dict.get(status + " count", 1) + 1
        timestamps[status] = time
        service_restart_times.update({service_name: service_dict})

    for message in messages:
        for status, pattern in service_patterns.items():
            if re.search(pattern, message):
                service_time_check(message, status)

    loganalyzer.save_extracted_log(dest="/tmp/log/syslog")
    logging.info(json.dumps(service_restart_times, indent=4))

    FMT = "%b %d %H:%M:%S.%f"
    for _, timings in service_restart_times.items():
        timestamps = timings["timestamp"]
        timings["stop_time"] = (datetime.strptime(timestamps["Stopped"], FMT) -\
            datetime.strptime(timestamps["Stopping"], FMT)).total_seconds() \
                if "Stopped" in timestamps and "Stopping" in timestamps else None

        timings["start_time"] = (datetime.strptime(timestamps["Started"], FMT) -\
            datetime.strptime(timestamps["Starting"], FMT)).total_seconds() \
                if "Started" in timestamps and "Starting" in timestamps else None

        timings["reboot_time"] = (datetime.strptime(timestamps["Started"], FMT) -\
            datetime.strptime(timestamps["Stopped"], FMT)).total_seconds() \
                if "Started" in timestamps and "Stopped" in timestamps else None

    files = glob.glob('/tmp/*-report.json')
    if files:
        filepath = files[0]
        with open(filepath) as json_file:
            report = json.load(json_file)
            service_restart_times.update(report)
    result = service_restart_times
    logging.info(json.dumps(result, indent=4))
Пример #10
0
def test_check_sfp_status_and_configure_sfp(duthost, conn_graph_facts):
    """
    @summary: Check SFP status and configure SFP

    This case is to use the sfputil tool and show command to check SFP status and configure SFP. Currently the
    only configuration is to reset SFP. Commands to be tested:
    * sfputil show presence
    * show interface transceiver presence
    * sfputil show eeprom
    * show interface transceiver eeprom
    * sfputil reset <interface name>
    """
    if duthost.facts["asic_type"] in ["mellanox"]:
        loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix='sfp_cfg')
        loganalyzer.load_common_config()

        loganalyzer.ignore_regex.append("kernel.*Eeprom query failed*")
        marker = loganalyzer.init()

    cmd_sfp_presence = "sudo sfputil show presence"
    cmd_sfp_eeprom = "sudo sfputil show eeprom"
    cmd_sfp_reset = "sudo sfputil reset"
    cmd_xcvr_presence = "show interface transceiver presence"
    cmd_xcvr_eeprom = "show interface transceiver eeprom"

    portmap = get_port_map(duthost)
    logging.info("Got portmap {}".format(portmap))

    logging.info("Check output of '%s'" % cmd_sfp_presence)
    sfp_presence = duthost.command(cmd_sfp_presence)
    parsed_presence = parse_output(sfp_presence["stdout_lines"][2:])
    for intf in conn_graph_facts["device_conn"]:
        assert intf in parsed_presence, "Interface is not in output of '%s'" % cmd_sfp_presence
        assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'"

    logging.info("Check output of '%s'" % cmd_xcvr_presence)
    xcvr_presence = duthost.command(cmd_xcvr_presence)
    parsed_presence = parse_output(xcvr_presence["stdout_lines"][2:])
    for intf in conn_graph_facts["device_conn"]:
        assert intf in parsed_presence, "Interface is not in output of '%s'" % cmd_xcvr_presence
        assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'"

    logging.info("Check output of '%s'" % cmd_sfp_eeprom)
    sfp_eeprom = duthost.command(cmd_sfp_eeprom)
    parsed_eeprom = parse_eeprom(sfp_eeprom["stdout_lines"])
    for intf in conn_graph_facts["device_conn"]:
        assert intf in parsed_eeprom, "Interface is not in output of 'sfputil show eeprom'"
        assert parsed_eeprom[intf] == "SFP EEPROM detected"

    logging.info("Check output of '%s'" % cmd_xcvr_eeprom)
    xcvr_eeprom = duthost.command(cmd_xcvr_eeprom)
    parsed_eeprom = parse_eeprom(xcvr_eeprom["stdout_lines"])
    for intf in conn_graph_facts["device_conn"]:
        assert intf in parsed_eeprom, "Interface is not in output of '%s'" % cmd_xcvr_eeprom
        assert parsed_eeprom[intf] == "SFP EEPROM detected"

    logging.info("Test '%s <interface name>'" % cmd_sfp_reset)
    tested_physical_ports = set()
    for intf in conn_graph_facts["device_conn"]:
        phy_intf = portmap[intf][0]
        if phy_intf in tested_physical_ports:
            logging.info("skip tested SFPs {} to avoid repeating operating physical interface {}".format(intf, phy_intf))
            continue
        tested_physical_ports.add(phy_intf)
        logging.info("resetting {} physical interface {}".format(intf, phy_intf))
        reset_result = duthost.command("%s %s" % (cmd_sfp_reset, intf))
        assert reset_result["rc"] == 0, "'%s %s' failed" % (cmd_sfp_reset, intf)
        time.sleep(5)
    logging.info("Wait some time for SFP to fully recover after reset")
    time.sleep(60)

    logging.info("Check sfp presence again after reset")
    sfp_presence = duthost.command(cmd_sfp_presence)
    parsed_presence = parse_output(sfp_presence["stdout_lines"][2:])
    for intf in conn_graph_facts["device_conn"]:
        assert intf in parsed_presence, "Interface is not in output of '%s'" % cmd_sfp_presence
        assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'"

    logging.info("Check interface status")
    mg_facts = duthost.minigraph_facts(host=duthost.hostname)["ansible_facts"]
    intf_facts = duthost.interface_facts(up_ports=mg_facts["minigraph_ports"])["ansible_facts"]
    assert len(intf_facts["ansible_interface_link_down_ports"]) == 0, \
        "Some interfaces are down: %s" % str(intf_facts["ansible_interface_link_down_ports"])

    if duthost.facts["asic_type"] in ["mellanox"]:
        loganalyzer.analyze(marker)
Пример #11
0
def test_check_sfp_low_power_mode(duthost, conn_graph_facts):
    """
    @summary: Check SFP low power mode

    This case is to use the sfputil tool command to check and set SFP low power mode
    * sfputil show lpmode
    * sfputil lpmode off
    * sfputil lpmode on
    """
    if duthost.facts["asic_type"] in ["mellanox"]:
        loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix='sfp_lpm')
        loganalyzer.load_common_config()

        loganalyzer.ignore_regex.append("Eeprom query failed")
        marker = loganalyzer.init()

    cmd_sfp_presence = "sudo sfputil show presence"
    cmd_sfp_show_lpmode = "sudo sfputil show lpmode"
    cmd_sfp_set_lpmode = "sudo sfputil lpmode"

    portmap = get_port_map(duthost)
    logging.info("Got portmap {}".format(portmap))

    logging.info("Check output of '%s'" % cmd_sfp_show_lpmode)
    lpmode_show = duthost.command(cmd_sfp_show_lpmode)
    parsed_lpmode = parse_output(lpmode_show["stdout_lines"][2:])
    original_lpmode = copy.deepcopy(parsed_lpmode)
    for intf in conn_graph_facts["device_conn"]:
        assert intf in parsed_lpmode, "Interface is not in output of '%s'" % cmd_sfp_show_lpmode
        assert parsed_lpmode[intf].lower() == "on" or parsed_lpmode[intf].lower() == "off", "Unexpected SFP lpmode"

    logging.info("Try to change SFP lpmode")
    tested_physical_ports = set()
    for intf in conn_graph_facts["device_conn"]:
        phy_intf = portmap[intf][0]
        if phy_intf in tested_physical_ports:
            logging.info("skip tested SFPs {} to avoid repeating operating physical interface {}".format(intf, phy_intf))
            continue
        tested_physical_ports.add(phy_intf)
        logging.info("setting {} physical interface {}".format(intf, phy_intf))
        new_lpmode = "off" if original_lpmode[intf].lower() == "on" else "on"
        lpmode_set_result = duthost.command("%s %s %s" % (cmd_sfp_set_lpmode, new_lpmode, intf))
        assert lpmode_set_result["rc"] == 0, "'%s %s %s' failed" % (cmd_sfp_set_lpmode, new_lpmode, intf)
    time.sleep(10)

    logging.info("Check SFP lower power mode again after changing SFP lpmode")
    lpmode_show = duthost.command(cmd_sfp_show_lpmode)
    parsed_lpmode = parse_output(lpmode_show["stdout_lines"][2:])
    for intf in conn_graph_facts["device_conn"]:
        assert intf in parsed_lpmode, "Interface is not in output of '%s'" % cmd_sfp_show_lpmode
        assert parsed_lpmode[intf].lower() == "on" or parsed_lpmode[intf].lower() == "off", "Unexpected SFP lpmode"

    logging.info("Try to change SFP lpmode")
    tested_physical_ports = set()
    for intf in conn_graph_facts["device_conn"]:
        phy_intf = portmap[intf][0]
        if phy_intf in tested_physical_ports:
            logging.info("skip tested SFPs {} to avoid repeating operating physical interface {}".format(intf, phy_intf))
            continue
        tested_physical_ports.add(phy_intf)
        logging.info("restoring {} physical interface {}".format(intf, phy_intf))
        new_lpmode = original_lpmode[intf].lower()
        lpmode_set_result = duthost.command("%s %s %s" % (cmd_sfp_set_lpmode, new_lpmode, intf))
        assert lpmode_set_result["rc"] == 0, "'%s %s %s' failed" % (cmd_sfp_set_lpmode, new_lpmode, intf)
    time.sleep(10)

    logging.info("Check SFP lower power mode again after changing SFP lpmode")
    lpmode_show = duthost.command(cmd_sfp_show_lpmode)
    parsed_lpmode = parse_output(lpmode_show["stdout_lines"][2:])
    for intf in conn_graph_facts["device_conn"]:
        assert intf in parsed_lpmode, "Interface is not in output of '%s'" % cmd_sfp_show_lpmode
        assert parsed_lpmode[intf].lower() == "on" or parsed_lpmode[intf].lower() == "off", "Unexpected SFP lpmode"

    logging.info("Check sfp presence again after setting lpmode")
    sfp_presence = duthost.command(cmd_sfp_presence)
    parsed_presence = parse_output(sfp_presence["stdout_lines"][2:])
    for intf in conn_graph_facts["device_conn"]:
        assert intf in parsed_presence, "Interface is not in output of '%s'" % cmd_sfp_presence
        assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'"

    logging.info("Check interface status")
    mg_facts = duthost.minigraph_facts(host=duthost.hostname)["ansible_facts"]
    intf_facts = duthost.interface_facts(up_ports=mg_facts["minigraph_ports"])["ansible_facts"]
    assert len(intf_facts["ansible_interface_link_down_ports"]) == 0, \
        "Some interfaces are down: %s" % str(intf_facts["ansible_interface_link_down_ports"])

    if duthost.facts["asic_type"] in ["mellanox"]:
        loganalyzer.analyze(marker)
Пример #12
0
def test_check_sfp_status_and_configure_sfp(duthosts, rand_one_dut_hostname, enum_frontend_asic_index, conn_graph_facts, tbinfo):
    """
    @summary: Check SFP status and configure SFP

    This case is to use the sfputil tool and show command to check SFP status and configure SFP. Currently the
    only configuration is to reset SFP. Commands to be tested:
    * sfputil show presence
    * show interface transceiver presence
    * sfputil show eeprom
    * show interface transceiver eeprom
    * sfputil reset <interface name>
    """
    duthost = duthosts[rand_one_dut_hostname]
    if duthost.facts["asic_type"] in ["mellanox"]:
        loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix='sfp_cfg')
        loganalyzer.load_common_config()

        loganalyzer.ignore_regex.append("kernel.*Eeprom query failed*")
        marker = loganalyzer.init()

    dev_conn = conn_graph_facts["device_conn"][duthost.hostname]

    # Get the interface pertaining to that asic
    portmap = get_port_map(duthost, enum_frontend_asic_index)
    logging.info("Got portmap {}".format(portmap))

    if enum_frontend_asic_index is not None:
        # Check if the interfaces of this AISC is present in conn_graph_facts
        dev_conn = {k:v for k, v in portmap.items() if k in conn_graph_facts["device_conn"][duthost.hostname]}
        logging.info("ASIC {} interface_list {}".format(enum_frontend_asic_index, dev_conn))

    cmd_sfp_presence = "sudo sfputil show presence"
    cmd_sfp_eeprom = "sudo sfputil show eeprom"
    cmd_sfp_reset = "sudo sfputil reset"
    cmd_xcvr_presence = "show interface transceiver presence"
    cmd_xcvr_eeprom = "show interface transceiver eeprom"

    global ans_host
    ans_host = duthost

    logging.info("Check output of '%s'" % cmd_sfp_presence)
    sfp_presence = duthost.command(cmd_sfp_presence)
    parsed_presence = parse_output(sfp_presence["stdout_lines"][2:])
    for intf in dev_conn:
        assert intf in parsed_presence, "Interface is not in output of '%s'" % cmd_sfp_presence
        assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'"

    logging.info("Check output of '%s'" % cmd_xcvr_presence)
    xcvr_presence = duthost.command(cmd_xcvr_presence)
    parsed_presence = parse_output(xcvr_presence["stdout_lines"][2:])
    for intf in dev_conn:
        assert intf in parsed_presence, "Interface is not in output of '%s'" % cmd_xcvr_presence
        assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'"

    logging.info("Check output of '%s'" % cmd_sfp_eeprom)
    sfp_eeprom = duthost.command(cmd_sfp_eeprom)
    parsed_eeprom = parse_eeprom(sfp_eeprom["stdout_lines"])
    for intf in dev_conn:
        assert intf in parsed_eeprom, "Interface is not in output of 'sfputil show eeprom'"
        assert parsed_eeprom[intf] == "SFP EEPROM detected"

    logging.info("Check output of '%s'" % cmd_xcvr_eeprom)
    xcvr_eeprom = duthost.command(cmd_xcvr_eeprom)
    parsed_eeprom = parse_eeprom(xcvr_eeprom["stdout_lines"])
    for intf in dev_conn:
        assert intf in parsed_eeprom, "Interface is not in output of '%s'" % cmd_xcvr_eeprom
        assert parsed_eeprom[intf] == "SFP EEPROM detected"

    logging.info("Test '%s <interface name>'" % cmd_sfp_reset)
    tested_physical_ports = set()
    for intf in dev_conn:
        phy_intf = portmap[intf][0]
        if phy_intf in tested_physical_ports:
            logging.info("skip tested SFPs {} to avoid repeating operating physical interface {}".format(intf, phy_intf))
            continue
        tested_physical_ports.add(phy_intf)
        logging.info("resetting {} physical interface {}".format(intf, phy_intf))
        reset_result = duthost.command("%s %s" % (cmd_sfp_reset, intf))
        assert reset_result["rc"] == 0, "'%s %s' failed" % (cmd_sfp_reset, intf)
        time.sleep(5)
    logging.info("Wait some time for SFP to fully recover after reset")
    time.sleep(60)

    logging.info("Check sfp presence again after reset")
    sfp_presence = duthost.command(cmd_sfp_presence)
    parsed_presence = parse_output(sfp_presence["stdout_lines"][2:])
    for intf in dev_conn:
        assert intf in parsed_presence, "Interface is not in output of '%s'" % cmd_sfp_presence
        assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'"

    logging.info("Check interface status")
    namespace = duthost.get_namespace_from_asic_id(enum_frontend_asic_index)
    mg_facts = duthost.get_extended_minigraph_facts(tbinfo)
    # TODO Remove this logic when minigraph facts supports namespace in multi_asic
    up_ports = mg_facts["minigraph_ports"]
    if enum_frontend_asic_index is not None:
        # Check if the interfaces of this AISC is present in conn_graph_facts
        up_ports = {k:v for k, v in portmap.items() if k in mg_facts["minigraph_ports"]}
    intf_facts = duthost.interface_facts(namespace=namespace, up_ports=up_ports)["ansible_facts"]
    assert len(intf_facts["ansible_interface_link_down_ports"]) == 0, \
        "Some interfaces are down: %s" % str(intf_facts["ansible_interface_link_down_ports"])

    if duthost.facts["asic_type"] in ["mellanox"]:
        loganalyzer.analyze(marker)
Пример #13
0
def test_check_sfp_low_power_mode(duthosts, rand_one_dut_hostname, enum_frontend_asic_index, conn_graph_facts, tbinfo):
    """
    @summary: Check SFP low power mode

    This case is to use the sfputil tool command to check and set SFP low power mode
    * sfputil show lpmode
    * sfputil lpmode off
    * sfputil lpmode on
    """
    duthost = duthosts[rand_one_dut_hostname]
    asichost = duthost.get_asic(enum_frontend_asic_index)
    if duthost.facts["asic_type"] in ["mellanox"]:
        loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix='sfp_lpm')
        loganalyzer.load_common_config()

        loganalyzer.ignore_regex.append("Eeprom query failed")
        marker = loganalyzer.init()

    dev_conn = conn_graph_facts["device_conn"][duthost.hostname]

    # Get the interface pertaining to that asic
    portmap = get_port_map(duthost, enum_frontend_asic_index)
    logging.info("Got portmap {}".format(portmap))

    if enum_frontend_asic_index is not None:
        # Check if the interfaces of this AISC is present in conn_graph_facts
        dev_conn = {k:v for k, v in portmap.items() if k in conn_graph_facts["device_conn"][duthost.hostname]}
        logging.info("ASIC {} interface_list {}".format(enum_frontend_asic_index, dev_conn))

    cmd_sfp_presence = "sudo sfputil show presence"
    cmd_sfp_show_lpmode = "sudo sfputil show lpmode"
    cmd_sfp_set_lpmode = "sudo sfputil lpmode"

    global ans_host
    ans_host = duthost

    logging.info("Check output of '%s'" % cmd_sfp_show_lpmode)
    lpmode_show = duthost.command(cmd_sfp_show_lpmode)
    parsed_lpmode = parse_output(lpmode_show["stdout_lines"][2:])
    original_lpmode = copy.deepcopy(parsed_lpmode)
    for intf in dev_conn:
        assert intf in parsed_lpmode, "Interface is not in output of '%s'" % cmd_sfp_show_lpmode
        assert parsed_lpmode[intf].lower() == "on" or parsed_lpmode[intf].lower() == "off", "Unexpected SFP lpmode"

    logging.info("Try to change SFP lpmode")
    tested_physical_ports = set()

    not_supporting_lpm_physical_ports = set()
    for intf in dev_conn:
        phy_intf = portmap[intf][0]
        if phy_intf in tested_physical_ports:
            logging.info("skip tested SFPs {} to avoid repeating operating physical interface {}".format(intf, phy_intf))
            continue

        sfp_type_cmd = 'redis-cli -n 6 hget "TRANSCEIVER_INFO|{}" type'.format(intf)
        sfp_type_docker_cmd = asichost.get_docker_cmd(sfp_type_cmd, "database")
        sfp_type = duthost.command(sfp_type_docker_cmd)["stdout"]

        power_class_cmd = 'redis-cli -n 6 hget "TRANSCEIVER_INFO|{}" ext_identifier'.format(intf)
        power_class_docker_cmd = asichost.get_docker_cmd(power_class_cmd, "database")
        power_class = duthost.command(power_class_docker_cmd)["stdout"]

        if not "QSFP" in sfp_type or "Power Class 1" in power_class:
            logging.info("skip testing port {} which doesn't support LPM".format(intf))
            not_supporting_lpm_physical_ports.add(phy_intf)
            continue
        tested_physical_ports.add(phy_intf)
        logging.info("setting {} physical interface {}".format(intf, phy_intf))
        new_lpmode = "off" if original_lpmode[intf].lower() == "on" else "on"
        lpmode_set_result = duthost.command("%s %s %s" % (cmd_sfp_set_lpmode, new_lpmode, intf))
        assert lpmode_set_result["rc"] == 0, "'%s %s %s' failed" % (cmd_sfp_set_lpmode, new_lpmode, intf)
    time.sleep(10)

    if len(tested_physical_ports) == 0:
        pytest.skip("None of the ports supporting LPM, skip the test")

    logging.info("Check SFP lower power mode again after changing SFP lpmode")
    lpmode_show = duthost.command(cmd_sfp_show_lpmode)
    parsed_lpmode = parse_output(lpmode_show["stdout_lines"][2:])
    for intf in dev_conn:
        assert intf in parsed_lpmode, "Interface is not in output of '%s'" % cmd_sfp_show_lpmode
        assert parsed_lpmode[intf].lower() == "on" or parsed_lpmode[intf].lower() == "off", "Unexpected SFP lpmode"

    logging.info("Try to change SFP lpmode")
    tested_physical_ports = set()
    for intf in dev_conn:
        phy_intf = portmap[intf][0]
        if phy_intf in not_supporting_lpm_physical_ports:
            logging.info("skip testing port {} which doesn't support LPM".format(intf))
            continue
        if phy_intf in tested_physical_ports:
            logging.info("skip tested SFPs {} to avoid repeating operating physical interface {}".format(intf, phy_intf))
            continue
        tested_physical_ports.add(phy_intf)
        logging.info("restoring {} physical interface {}".format(intf, phy_intf))
        new_lpmode = original_lpmode[intf].lower()
        lpmode_set_result = duthost.command("%s %s %s" % (cmd_sfp_set_lpmode, new_lpmode, intf))
        assert lpmode_set_result["rc"] == 0, "'%s %s %s' failed" % (cmd_sfp_set_lpmode, new_lpmode, intf)
    time.sleep(10)

    logging.info("Check SFP lower power mode again after changing SFP lpmode")
    lpmode_show = duthost.command(cmd_sfp_show_lpmode)
    parsed_lpmode = parse_output(lpmode_show["stdout_lines"][2:])
    for intf in dev_conn:
        assert intf in parsed_lpmode, "Interface is not in output of '%s'" % cmd_sfp_show_lpmode
        assert parsed_lpmode[intf].lower() == "on" or parsed_lpmode[intf].lower() == "off", "Unexpected SFP lpmode"

    logging.info("Check sfp presence again after setting lpmode")
    sfp_presence = duthost.command(cmd_sfp_presence)
    parsed_presence = parse_output(sfp_presence["stdout_lines"][2:])
    for intf in dev_conn:
        assert intf in parsed_presence, "Interface is not in output of '%s'" % cmd_sfp_presence
        assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'"

    logging.info("Check interface status")
    namespace = duthost.get_namespace_from_asic_id(enum_frontend_asic_index)
    mg_facts = duthost.get_extended_minigraph_facts(tbinfo)
    # TODO Remove this logic when minigraph facts supports namespace in multi_asic
    up_ports = mg_facts["minigraph_ports"]
    if enum_frontend_asic_index is not None:
        # Check if the interfaces of this AISC is present in conn_graph_facts
        up_ports = {k:v for k, v in portmap.items() if k in mg_facts["minigraph_ports"]}
    intf_facts = duthost.interface_facts(namespace=namespace, up_ports=up_ports)["ansible_facts"]
    assert len(intf_facts["ansible_interface_link_down_ports"]) == 0, \
        "Some interfaces are down: %s" % str(intf_facts["ansible_interface_link_down_ports"])

    if duthost.facts["asic_type"] in ["mellanox"]:
        loganalyzer.analyze(marker)
Пример #14
0
def advanceboot_loganalyzer(duthosts, rand_one_dut_hostname, request):
    """
    Advance reboot log analysis.
    This fixture starts log analysis at the beginning of the test. At the end,
    the collected expect messages are verified and timing of start/stop is calculated.

    Args:
        duthosts : List of DUT hosts
        rand_one_dut_hostname: hostname of a randomly selected DUT
    """
    duthost = duthosts[rand_one_dut_hostname]
    test_name = request.node.name
    if "warm" in test_name:
        reboot_type = "warm"
    elif "fast" in test_name:
        reboot_type = "fast"
    else:
        reboot_type = "unknown"
    # Currently, advanced reboot test would skip for kvm platform if the test has no device_type marker for vs.
    # Doing the same skip logic in this fixture to avoid running loganalyzer without the test executed
    if duthost.facts['platform'] == 'x86_64-kvm_x86_64-r0':
        device_marks = [arg for mark in request.node.iter_markers(name='device_type') for arg in mark.args]
        if 'vs' not in device_marks:
            pytest.skip('Testcase not supported for kvm')

    base_os_version = duthost.shell('sonic_installer list | grep Current | cut -f2 -d " "')['stdout']
    if 'SONiC-OS-201811' in base_os_version:
        bgpd_log = "/var/log/quagga/bgpd.log"
    else:
        bgpd_log = "/var/log/frr/bgpd.log"

    hwsku = duthost.facts["hwsku"]
    log_filesystem = duthost.shell("df -h | grep '/var/log'")['stdout']
    logs_in_tmpfs = True if log_filesystem and "tmpfs" in log_filesystem else False
    if hwsku in SMALL_DISK_SKUS or logs_in_tmpfs:
        # For small disk devices, /var/log in mounted in tmpfs.
        # Hence, after reboot the preboot logs are lost.
        # For log_analyzer to work, it needs logs from the shutdown path
        # Below method inserts a step in reboot script to back up logs to /host/
        overwrite_script_to_backup_logs(duthost, reboot_type, bgpd_log)

    loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="test_advanced_reboot_{}".format(test_name),
                    additional_files={'/var/log/swss/sairedis.rec': 'recording on: /var/log/swss/sairedis.rec', bgpd_log: ''})

    def pre_reboot_analysis():
        marker = loganalyzer.init()
        loganalyzer.load_common_config()

        ignore_file = os.path.join(TEMPLATES_DIR, "ignore_boot_messages")
        expect_file = os.path.join(TEMPLATES_DIR, "expect_boot_messages")
        ignore_reg_exp = loganalyzer.parse_regexp_file(src=ignore_file)
        expect_reg_exp = loganalyzer.parse_regexp_file(src=expect_file)

        loganalyzer.ignore_regex.extend(ignore_reg_exp)
        loganalyzer.expect_regex = []
        loganalyzer.expect_regex.extend(expect_reg_exp)
        loganalyzer.match_regex = []
        return marker

    def post_reboot_analysis(marker, reboot_oper=None, log_dir=None):
        if hwsku in SMALL_DISK_SKUS or logs_in_tmpfs:
            restore_backup = "mv /host/syslog.99 /var/log/; " +\
                "mv /host/sairedis.rec.99 /var/log/swss/; " +\
                    "mv /host/swss.rec.99 /var/log/swss/; " +\
                        "mv /host/bgpd.log.99 /var/log/frr/"
            duthost.shell(restore_backup, module_ignore_errors=True)
            # find the fast/warm-reboot script path
            reboot_script_path = duthost.shell('which {}'.format("{}-reboot".format(reboot_type)))['stdout']
            # restore original script. If the ".orig" file does not exist (upgrade path case), ignore the error.
            duthost.shell("mv {} {}".format(reboot_script_path + ".orig", reboot_script_path), module_ignore_errors=True)

        # check current OS version post-reboot. This can be different than preboot OS version in case of upgrade
        target_os_version = duthost.shell('sonic_installer list | grep Current | cut -f2 -d " "')['stdout']
        upgrade_out_201811 = "SONiC-OS-201811" in base_os_version and "SONiC-OS-201811" not in target_os_version
        if 'SONiC-OS-201811' in target_os_version:
            bgpd_log = "/var/log/quagga/bgpd.log"
        else:
            bgpd_log = "/var/log/frr/bgpd.log"
        if upgrade_out_201811 and not logs_in_tmpfs:
            # if upgrade from 201811 to future branch is done there are two cases:
            # 1. Small disk devices: previous quagga logs don't exist anymore, handled in restore_backup.
            # 2. Other devices: prev quagga log to be copied to a common place, for ansible extract to work:
            duthost.shell("cp {} {}".format(
                "/var/log/quagga/bgpd.log", "/var/log/frr/bgpd.log.99"), module_ignore_errors=True)
        additional_files={'/var/log/swss/sairedis.rec': 'recording on: /var/log/swss/sairedis.rec', bgpd_log: ''}
        loganalyzer.additional_files = list(additional_files.keys())
        loganalyzer.additional_start_str = list(additional_files.values())

        result = loganalyzer.analyze(marker, fail=False)
        analyze_result = {"time_span": dict(), "offset_from_kexec": dict()}
        offset_from_kexec = dict()

        for key, messages in result["expect_messages"].items():
            if "syslog" in key:
                get_kexec_time(duthost, messages, analyze_result)
                reboot_start_time = analyze_result.get("reboot_time", {}).get("timestamp", {}).get("Start")
                if not reboot_start_time or reboot_start_time == "N/A":
                    logging.error("kexec regex \"Rebooting with /sbin/kexec\" not found in syslog. " +\
                    "Skipping log_analyzer checks..")
                    return
                analyze_log_file(duthost, messages, analyze_result, offset_from_kexec)
            elif "bgpd.log" in key:
                analyze_log_file(duthost, messages, analyze_result, offset_from_kexec)
            elif "sairedis.rec" in key:
                analyze_sairedis_rec(messages, analyze_result, offset_from_kexec)

        for marker, time_data in analyze_result["offset_from_kexec"].items():
            marker_start_time = time_data.get("timestamp", {}).get("Start")
            reboot_start_time = analyze_result.get("reboot_time", {}).get("timestamp", {}).get("Start")
            if reboot_start_time and reboot_start_time != "N/A" and marker_start_time:
                time_data["time_taken"] = (_parse_timestamp(marker_start_time) -\
                    _parse_timestamp(reboot_start_time)).total_seconds()
            else:
                time_data["time_taken"] = "N/A"

        get_data_plane_report(analyze_result, reboot_type, log_dir, reboot_oper)
        result_summary = get_report_summary(analyze_result, reboot_type)
        logging.info(json.dumps(analyze_result, indent=4))
        logging.info(json.dumps(result_summary, indent=4))
        if reboot_oper and not isinstance(reboot_oper, str):
            reboot_oper = type(reboot_oper).__name__
        if reboot_oper:
            report_file_name = request.node.name + "_" + reboot_oper + "_report.json"
            summary_file_name = request.node.name + "_" + reboot_oper + "_summary.json"
        else:
            report_file_name = request.node.name + "_report.json"
            summary_file_name = request.node.name + "_summary.json"


        report_file_dir = os.path.realpath((os.path.join(os.path.dirname(__file__),\
            "../logs/platform_tests/")))
        report_file_path = report_file_dir + "/" + report_file_name
        summary_file_path = report_file_dir + "/" + summary_file_name
        if not os.path.exists(report_file_dir):
            os.makedirs(report_file_dir)
        with open(report_file_path, 'w') as fp:
            json.dump(analyze_result, fp, indent=4)
        with open(summary_file_path, 'w') as fp:
            json.dump(result_summary, fp, indent=4)

        # After generating timing data report, do some checks on the timing data
        verify_mac_jumping(test_name, analyze_result)

    yield pre_reboot_analysis, post_reboot_analysis
Пример #15
0
def consumes_memory_and_checks_monit(duthost, container_name, vm_workers, new_syntax_enabled):
    """Invokes the 'stress' utility to consume memory more than the threshold asynchronously
    and checks whether the container can be stopped and restarted. After container was restarted,
    'stress' utility will be invoked again to consume memory and checks whether Monit was able to
    restart this container with or without help of new syntax.
    Loganalyzer is leveraged to check whether the log messages related to container stopped
    and started were generated.

    Args:
        duthost: The AnsibleHost object of DuT.
        container_name: Name of container.
        vm_workers: Number of workers which does the spinning on malloc()/free()
          to consume memory.
        new_syntax_enabled: Checks to make sure container will be restarted if it is set to be 
          `True`.

    Returns:
        None.
    """
    expected_alerting_messages = []
    expected_alerting_messages.append(".*restart_service.*Restarting service 'telemetry'.*")
    expected_alerting_messages.append(".*Stopping Telemetry container.*")
    expected_alerting_messages.append(".*Stopped Telemetry container.*")
    expected_alerting_messages.append(".*Starting Telemetry container.*")
    expected_alerting_messages.append(".*Started Telemetry container.*")

    loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="test_memory_checker")
    loganalyzer.expect_regex = []
    loganalyzer.expect_regex.extend(expected_alerting_messages)
    marker = loganalyzer.init()

    thread_pool = ThreadPool()
    thread_pool.apply_async(consume_memory, (duthost, container_name, vm_workers))

    logger.info("Sleep '{}' seconds to wait for the alerting messages from syslog ...".format(WAITING_SYSLOG_MSG_SECS))
    time.sleep(WAITING_SYSLOG_MSG_SECS)

    logger.info("Checking the alerting messages related to container restart ...")
    loganalyzer.analyze(marker)
    logger.info("Found all the expected alerting messages from syslog!")

    logger.info("Waiting for '{}' container to be restarted ...".format(container_name))
    restarted = wait_until(CONTAINER_RESTART_THRESHOLD_SECS,
                           CONTAINER_CHECK_INTERVAL_SECS,
                           0,
                           check_container_state, duthost, container_name, True)
    pytest_assert(restarted, "Failed to restart '{}' container!".format(container_name))
    logger.info("'{}' container is restarted.".format(container_name))

    logger.info("Running 'stress' utility again in '{}' ...".format(container_name))
    thread_pool.apply_async(consume_memory, (duthost, container_name, vm_workers))

    check_counter = 0
    marker = loganalyzer.update_marker_prefix("test_monit_counter")
    logger.info("Checking memory usage of '{}' every 30 seconds for 6 times ...".format(container_name))
    while check_counter < 6:
        check_counter += 1
        mem_usage = get_container_mem_usage(duthost, container_name)
        logger.info("Memory usage of '{}' is '{}'".format(container_name, mem_usage))
        time.sleep(30)

    logger.info("Analyzing syslog messages to verify whether '{}' is restarted ...".format(container_name))
    analyzing_result = loganalyzer.analyze(marker, fail=False)
    if not new_syntax_enabled:
        pytest_assert(analyzing_result["total"]["expected_match"] == 0,
                      "Monit can reset counter and restart '{}'!".format(container_name))
        logger.info("Monit was unable to reset its counter and '{}' can not be restarted!".format(container_name))
    else:
        pytest_assert(analyzing_result["total"]["expected_match"] == len(expected_alerting_messages),
                      "Monit still can not restart '{}' with the help of new syntax!".format(container_name))
        logger.info("Monit was able to restart '{}' with the help of new syntax!".format(container_name))
Пример #16
0
def test_nhop(request, duthost, tbinfo):
    """
    Test next hop group resource count. Steps:
    - Add test IP address to an active IP interface
    - Add static ARPs
    - Create unique next hop groups
    - Add IP route and nexthop
    - check CRM resource
    - clean up
    - Verify no erros and crash
    """
    skip_release(duthost, ["201811", "201911"])

    default_max_nhop_paths = 32
    nhop_group_limit = 1024
    # program more than the advertised limit
    extra_nhops = 10

    asic = duthost.asic_instance()

    # find out MAX NHOP group count supported on the platform
    result = asic.run_redis_cmd(
        argv=["redis-cli", "-n", 6, "HGETALL", "SWITCH_CAPABILITY|switch"])
    it = iter(result)
    switch_capability = dict(zip(it, it))
    max_nhop = switch_capability.get("MAX_NEXTHOP_GROUP_COUNT")
    max_nhop = nhop_group_limit if max_nhop == None else int(max_nhop)
    nhop_group_count = min(max_nhop, nhop_group_limit) + extra_nhops

    # find out an active IP port
    ip_ifaces = asic.get_active_ip_interfaces(tbinfo).keys()
    pytest_assert(len(ip_ifaces), "No IP interfaces found")
    eth_if = ip_ifaces[0]

    # Generate ARP entries
    arp_count = 40
    arplist = Arp(duthost, asic, arp_count, eth_if)
    arplist.arps_add()

    # indices
    indices = range(arp_count)
    ip_indices = combinations(indices, default_max_nhop_paths)

    # intitialize log analyzer
    marker = "NHOP TEST PATH COUNT {} {}".format(nhop_group_count, eth_if)
    loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix=marker)
    marker = loganalyzer.init()
    loganalyzer.load_common_config()
    loganalyzer.expect_regex = []
    loganalyzer.ignore_regex.extend(loganalyzer_ignore_regex_list())

    ip_prefix = ipaddr.IPAddress("192.168.0.0")

    # list of all IPs available to generate a nexthop group
    ip_list = arplist.ip_mac_list

    crm_before = get_crm_info(duthost, asic)

    # increase CRM polling time
    asic.command("crm config polling interval 10")

    logging.info("Adding {} next hops on {}".format(nhop_group_count, eth_if))

    # create nexthop group
    nhop = IPRoutes(duthost, asic)
    try:
        for i, indx_list in zip(range(nhop_group_count), ip_indices):
            # get a list of unique group of next hop IPs
            ips = [arplist.ip_mac_list[x].ip for x in indx_list]

            ip_route = "{}/31".format(ip_prefix + (2 * i))

            # add IP route with the next hop group created
            nhop.add_ip_route(ip_route, ips)

        nhop.program_routes()
        # wait for routes to be synced and programmed
        time.sleep(120)
        crm_after = get_crm_info(duthost, asic)

    finally:
        nhop.delete_routes()
        arplist.clean_up()
        asic.command("crm config polling interval {}".format(
            crm_before["polling"]))

    # check for any errors or crash
    loganalyzer.analyze(marker)

    # verify the test used up all the NHOP group resources
    # skip this check on Mellanox as ASIC resources are shared
    if not is_mellanox_device(duthost):
        pytest_assert(
            crm_after["available"] == 0,
            "Unused NHOP group resource: {}, used:{}".format(
                crm_after["available"], crm_after["used"]))
def test_thermal_control_fan_status(duthosts, rand_one_dut_hostname, mocker_factory):
    """
    @summary: Make FAN absence, over speed and under speed, check logs and LED color.
    """
    duthost = duthosts[rand_one_dut_hostname]
    loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix='thermal_control')
    loganalyzer.load_common_config()

    with ThermalPolicyFileContext(duthost, THERMAL_POLICY_VALID_FILE):
        fan_mocker = mocker_factory(duthost, 'FanStatusMocker')
        if fan_mocker is None:
            pytest.skip("No FanStatusMocker for %s, skip rest of the testing in this case" % duthost.facts['asic_type'])

        logging.info('Mock FAN status data...')
        fan_mocker.mock_data()  # make data random
        restart_thermal_control_daemon(duthost)
        wait_until(THERMAL_CONTROL_TEST_WAIT_TIME, THERMAL_CONTROL_TEST_CHECK_INTERVAL, fan_mocker.check_all_fan_speed,
                   60)
        check_thermal_algorithm_status(duthost, mocker_factory, False)

        single_fan_mocker = mocker_factory(duthost, 'SingleFanMocker')
        time.sleep(THERMAL_CONTROL_TEST_WAIT_TIME)

        _fan_log_supported = duthost.command('docker exec pmon grep -E "{}" /usr/bin/thermalctld'\
                .format(LOG_EXPECT_INSUFFICIENT_FAN_NUM_RE), module_ignore_errors=True)

        if single_fan_mocker.is_fan_removable():
            loganalyzer.expect_regex = [LOG_EXPECT_FAN_REMOVE_RE, LOG_EXPECT_INSUFFICIENT_FAN_NUM_RE]
            if _fan_log_supported.is_failed:
                loganalyzer.expect_regex.remove(LOG_EXPECT_INSUFFICIENT_FAN_NUM_RE)
            with loganalyzer:
                logging.info('Mocking an absence FAN...')
                single_fan_mocker.mock_absence()
                check_cli_output_with_mocker(duthost, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME, 2)

            loganalyzer.expect_regex = [LOG_EXPECT_FAN_REMOVE_CLEAR_RE, LOG_EXPECT_INSUFFICIENT_FAN_NUM_CLEAR_RE]
            if _fan_log_supported.is_failed:
                loganalyzer.expect_regex.remove(LOG_EXPECT_INSUFFICIENT_FAN_NUM_CLEAR_RE)
            with loganalyzer:
                logging.info('Make the absence FAN back to presence...')
                single_fan_mocker.mock_presence()
                check_cli_output_with_mocker(duthost, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME, 2)

        if not _fan_log_supported.is_failed:
            loganalyzer.expect_regex = [LOG_EXPECT_FAN_FAULT_RE, LOG_EXPECT_INSUFFICIENT_FAN_NUM_RE]
            with loganalyzer:
                logging.info('Mocking a fault FAN...')
                single_fan_mocker.mock_status(False)
                check_cli_output_with_mocker(duthost, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME, 2)

            loganalyzer.expect_regex = [LOG_EXPECT_FAN_FAULT_CLEAR_RE, LOG_EXPECT_INSUFFICIENT_FAN_NUM_CLEAR_RE]
            with loganalyzer:
                logging.info('Mocking the fault FAN back to normal...')
                single_fan_mocker.mock_status(True)

            check_cli_output_with_mocker(duthost, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME, 2)

        loganalyzer.expect_regex = [LOG_EXPECT_FAN_OVER_SPEED_RE]
        with loganalyzer:
            logging.info('Mocking an over speed FAN...')
            single_fan_mocker.mock_over_speed()
            check_cli_output_with_mocker(duthost, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME, 2)

        loganalyzer.expect_regex = [LOG_EXPECT_FAN_OVER_SPEED_CLEAR_RE]
        with loganalyzer:
            logging.info('Make the over speed FAN back to normal...')
            single_fan_mocker.mock_normal_speed()
            check_cli_output_with_mocker(duthost, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME, 2)

        loganalyzer.expect_regex = [LOG_EXPECT_FAN_UNDER_SPEED_RE]
        with loganalyzer:
            logging.info('Mocking an under speed FAN...')
            single_fan_mocker.mock_under_speed()
            check_cli_output_with_mocker(duthost, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME, 2)

        loganalyzer.expect_regex = [LOG_EXPECT_FAN_UNDER_SPEED_CLEAR_RE]
        with loganalyzer:
            logging.info('Make the under speed FAN back to normal...')
            single_fan_mocker.mock_normal_speed()
            check_cli_output_with_mocker(duthost, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME, 2)
Пример #18
0
def test_turn_on_off_psu_and_check_psustatus(duthost, psu_controller):
    """
    @summary: Turn off/on PSU and check PSU status using 'show platform psustatus'
    """
    loganalyzer = LogAnalyzer(
        ansible_host=duthost,
        marker_prefix='turn_on_off_psu_and_check_psustatus')
    loganalyzer.load_common_config()

    loganalyzer.ignore_regex.append(
        "Error getting sensor data: dps460.*Kernel interface error")
    marker = loganalyzer.init()

    psu_line_pattern = re.compile(r"PSU\s+\d+\s+(OK|NOT OK|NOT PRESENT)")

    psu_num = get_psu_num(duthost)
    if psu_num < 2:
        pytest.skip(
            "At least 2 PSUs required for rest of the testing in this case")

    logging.info("Create PSU controller for testing")
    psu_ctrl = psu_controller
    if psu_ctrl is None:
        pytest.skip(
            "No PSU controller for %s, skip rest of the testing in this case" %
            duthost.hostname)

    logging.info(
        "To avoid DUT being shutdown, need to turn on PSUs that are not powered"
    )
    turn_all_psu_on(psu_ctrl)

    logging.info("Initialize test results")
    psu_test_results = {}
    if not check_all_psu_on(duthost, psu_test_results):
        pytest.skip(
            "Some PSU are still down, skip rest of the testing in this case")

    assert len(psu_test_results.keys()) == psu_num, \
        "In consistent PSU number output by '%s' and '%s'" % (CMD_PLATFORM_PSUSTATUS, "sudo psuutil numpsus")

    logging.info("Start testing turn off/on PSUs")
    all_psu_status = psu_ctrl.get_psu_status()
    for psu in all_psu_status:
        psu_under_test = None

        logging.info("Turn off PSU %s" % str(psu["psu_id"]))
        psu_ctrl.turn_off_psu(psu["psu_id"])
        time.sleep(5)

        cli_psu_status = duthost.command(CMD_PLATFORM_PSUSTATUS)
        for line in cli_psu_status["stdout_lines"][2:]:
            assert psu_line_pattern.match(line), "Unexpected PSU status output"
            fields = line.split()
            if fields[2] != "OK":
                psu_under_test = fields[1]
            check_vendor_specific_psustatus(duthost, line)
        assert psu_under_test is not None, "No PSU is turned off"

        logging.info("Turn on PSU %s" % str(psu["psu_id"]))
        psu_ctrl.turn_on_psu(psu["psu_id"])
        time.sleep(5)

        cli_psu_status = duthost.command(CMD_PLATFORM_PSUSTATUS)
        for line in cli_psu_status["stdout_lines"][2:]:
            assert psu_line_pattern.match(line), "Unexpected PSU status output"
            fields = line.split()
            if fields[1] == psu_under_test:
                assert fields[
                    2] == "OK", "Unexpected PSU status after turned it on"
            check_vendor_specific_psustatus(duthost, line)

        psu_test_results[psu_under_test] = True

    for psu in psu_test_results:
        assert psu_test_results[psu], "Test psu status of PSU %s failed" % psu

    loganalyzer.analyze(marker)