Пример #1
0
def _get_vendor_id(duthost):
    if is_broadcom_device(duthost):
        vendor_id = "brcm"
    elif is_mellanox_device(duthost):
        vendor_id = "mlnx"
    else:
        error_message = '"{}" does not currently support swap_syncd'.format(duthost.facts["asic_type"])
        logger.error(error_message)
        raise ValueError(error_message)

    return vendor_id
Пример #2
0
def swap_syncd(dut):
    """
        Replaces the running syncd container with the RPC version of it.

        This will download a new Docker image to the DUT and restart the swss service.

        Args:
            dut (SonicHost): The target device.
    """

    if is_broadcom_device(dut):
        vendor_id = "brcm"
    elif is_mellanox_device(dut):
        vendor_id = "mlnx"
    else:
        error_message = "\"{}\" is not currently supported".format(
            dut.facts["asic_type"])
        _LOGGER.error(error_message)
        raise ValueError(error_message)

    docker_syncd_name = "docker-syncd-{}".format(vendor_id)
    docker_rpc_image = docker_syncd_name + "-rpc"

    dut.command("systemctl stop swss")
    delete_container(dut, "syncd")

    # Set sysctl RCVBUF parameter for tests
    dut.command("sysctl -w net.core.rmem_max=609430500")

    # Set sysctl SENDBUF parameter for tests
    dut.command("sysctl -w net.core.wmem_max=609430500")

    # TODO: Getting the base image version should be a common utility
    output = dut.command(
        "sonic-cfggen -y /etc/sonic/sonic_version.yml -v build_version")
    sonic_version = output["stdout_lines"][0].strip()

    registry = load_docker_registry_info(dut)
    download_image(dut, registry, docker_rpc_image, sonic_version)

    tag_image(dut, "{}:latest".format(docker_syncd_name),
              "{}/{}".format(registry.host, docker_rpc_image), sonic_version)

    dut.command("systemctl reset-failed swss")
    dut.command("systemctl start swss")

    _LOGGER.info(
        "swss has been restarted, waiting 60 seconds to initialize...")
    time.sleep(60)
Пример #3
0
def restore_default_syncd(dut):
    """
        Replaces the running syncd with the default syncd that comes with the image.

        This will restart the swss service.

        Args:
            dut (SonicHost): The target device.
    """

    if is_broadcom_device(dut):
        vendor_id = "brcm"
    elif is_mellanox_device(dut):
        vendor_id = "mlnx"
    else:
        error_message = "\"{}\" is not currently supported".format(
            dut.facts["asic_type"])
        _LOGGER.error(error_message)
        raise ValueError(error_message)

    docker_syncd_name = "docker-syncd-{}".format(vendor_id)

    dut.command("systemctl stop swss")
    delete_container(dut, "syncd")

    # TODO: Getting the base image version should be a common utility
    output = dut.command(
        "sonic-cfggen -y /etc/sonic/sonic_version.yml -v build_version")
    sonic_version = output["stdout_lines"][0].strip()

    tag_image(dut, "{}:latest".format(docker_syncd_name), docker_syncd_name,
              sonic_version)

    dut.command("systemctl reset-failed swss")
    dut.command("systemctl start swss")

    _LOGGER.info(
        "swss has been restarted, waiting 60 seconds to initialize...")
    time.sleep(60)

    # Remove the RPC image from the DUT
    docker_rpc_image = docker_syncd_name + "-rpc"
    registry = load_docker_registry_info(dut)
    dut.command("docker rmi {}/{}:{}".format(registry.host, docker_rpc_image,
                                             sonic_version))
Пример #4
0
def _get_sai_running_vendor_id(duthost):
    """
    Get the vendor id.

    Args:
        duthost (SonicHost): The target device.
    """
    if is_broadcom_device(duthost):
        vendor_id = "brcm"
    elif is_mellanox_device(duthost):
        vendor_id = "mlnx"
    elif is_barefoot_device(duthost):
        vendor_id = "bfn"
    else:
        error_message = '"{}" does not currently support saitest'.format(duthost.facts["asic_type"])
        logger.error(error_message)
        raise ValueError(error_message)

    return vendor_id
Пример #5
0
def get_manufacturer_program_to_check(duthost):
    if is_mellanox_device(duthost):
        return CounterpollConstants.SX_SDK
Пример #6
0
def analyze_log_file(duthost, messages, result, offset_from_kexec):
    service_restart_times = dict()
    derived_patterns = OTHER_PATTERNS.get("COMMON")
    service_patterns = dict()
    # get platform specific regexes
    if is_broadcom_device(duthost):
        derived_patterns.update(OTHER_PATTERNS.get("BRCM"))
    elif is_mellanox_device(duthost):
        derived_patterns.update(OTHER_PATTERNS.get("MLNX"))
    # get image specific regexes
    if "20191130" in duthost.os_version:
        derived_patterns.update(OTHER_PATTERNS.get("201911"))
        service_patterns.update(SERVICE_PATTERNS.get("201911"))
    else:
        derived_patterns.update(OTHER_PATTERNS.get("LATEST"))
        service_patterns.update(SERVICE_PATTERNS.get("LATEST"))

    if not messages:
        logging.error("Expected messages not found in syslog")
        return None

    def service_time_check(message, status):
        time = datetime.strptime(
            message.split(duthost.hostname)[0].strip(), FMT)
        time = time.strftime(FMT)
        service_name = message.split(status + " ")[1].split()[0]
        service_name = service_name.upper()
        if service_name == "ROUTER":
            service_name = "RADV"
        service_dict = service_restart_times.get(service_name,
                                                 {"timestamp": {}})
        timestamps = service_dict.get("timestamp")
        if status in timestamps:
            service_dict[status +
                         " count"] = service_dict.get(status + " count", 1) + 1
        timestamps[status] = time
        service_restart_times.update({service_name: service_dict})

    reboot_time = "N/A"
    for message in messages:
        # Get stopping to started timestamps for services (swss, bgp, etc)
        for status, pattern in service_patterns.items():
            if re.search(pattern, message):
                service_time_check(message, status)
                break
        # Get timestamps of all other entities
        for state, pattern in derived_patterns.items():
            if re.search(pattern, message):
                timestamp = datetime.strptime(
                    message.split(duthost.hostname)[0].strip(), FMT)
                state_name = state.split("|")[0].strip()
                if state_name + "|End" not in derived_patterns.keys():
                    state_times = get_state_times(timestamp, state,
                                                  offset_from_kexec)
                    offset_from_kexec.update(state_times)
                else:
                    state_times = get_state_times(timestamp, state,
                                                  service_restart_times)
                    service_restart_times.update(state_times)
                break

    # Calculate time that services took to stop/start
    for _, timings in service_restart_times.items():
        timestamps = timings["timestamp"]
        timings["stop_time"] = (datetime.strptime(timestamps["Stopped"], FMT) -\
            datetime.strptime(timestamps["Stopping"], FMT)).total_seconds() \
                if "Stopped" in timestamps and "Stopping" in timestamps else None

        timings["start_time"] = (datetime.strptime(timestamps["Started"], FMT) -\
            datetime.strptime(timestamps["Starting"], FMT)).total_seconds() \
                if "Started" in timestamps and "Starting" in timestamps else None

        if "Started" in timestamps and "Stopped" in timestamps:
            timings["time_span"] = (datetime.strptime(timestamps["Started"], FMT) -\
                datetime.strptime(timestamps["Stopped"], FMT)).total_seconds()
        elif "Start" in timestamps and "End" in timestamps:
            if "last_occurence" in timings:
                timings["time_span"] = (datetime.strptime(timings["last_occurence"], FMT) -\
                    datetime.strptime(timestamps["Start"], FMT)).total_seconds()
            else:
                timings["time_span"] = (datetime.strptime(timestamps["End"], FMT) -\
                    datetime.strptime(timestamps["Start"], FMT)).total_seconds()

    result["time_span"].update(service_restart_times)
    result["offset_from_kexec"] = offset_from_kexec
    return result
Пример #7
0
def swap_syncd(dut, creds):
    """
        Replaces the running syncd container with the RPC version of it.

        This will download a new Docker image to the DUT and restart the swss service.

        Args:
            dut (SonicHost): The target device.
    """

    if is_broadcom_device(dut):
        vendor_id = "brcm"
    elif is_mellanox_device(dut):
        vendor_id = "mlnx"
    else:
        error_message = "\"{}\" is not currently supported".format(
            dut.facts["asic_type"])
        _LOGGER.error(error_message)
        raise ValueError(error_message)

    docker_syncd_name = "docker-syncd-{}".format(vendor_id)
    docker_rpc_image = docker_syncd_name + "-rpc"

    dut.command("config bgp shutdown all"
                )  # Force image download to go through mgmt network
    dut.command("systemctl stop swss")
    delete_container(dut, "syncd")

    # Set sysctl RCVBUF parameter for tests
    dut.command("sysctl -w net.core.rmem_max=609430500")

    # Set sysctl SENDBUF parameter for tests
    dut.command("sysctl -w net.core.wmem_max=609430500")

    # TODO: Getting the base image version should be a common utility
    output = dut.command(
        "sonic-cfggen -y /etc/sonic/sonic_version.yml -v build_version")
    sonic_version = output["stdout_lines"][0].strip()

    def ready_for_swap():
        syncd_status = dut.command("docker ps -f name=syncd")["stdout_lines"]
        if len(syncd_status) > 1:
            return False

        swss_status = dut.command("docker ps -f name=swss")["stdout_lines"]
        if len(swss_status) > 1:
            return False

        bgp_summary = dut.command("show ip bgp summary")["stdout_lines"]
        idle_count = 0
        expected_idle_count = 0
        for line in bgp_summary:
            if "Idle (Admin)" in line:
                idle_count += 1

            if "Total number of neighbors" in line:
                tokens = line.split()
                expected_idle_count = int(tokens[-1])

        return idle_count == expected_idle_count

    pytest_assert(wait_until(30, 3, ready_for_swap),
                  "Docker and/or BGP failed to shut down")

    registry = load_docker_registry_info(dut, creds)
    download_image(dut, registry, docker_rpc_image, sonic_version)

    tag_image(dut, "{}:latest".format(docker_syncd_name),
              "{}/{}".format(registry.host, docker_rpc_image), sonic_version)

    _LOGGER.info("Reloading config and restarting swss...")
    config_reload(dut)
Пример #8
0
def test_nhop(request, duthost, tbinfo):
    """
    Test next hop group resource count. Steps:
    - Add test IP address to an active IP interface
    - Add static ARPs
    - Create unique next hop groups
    - Add IP route and nexthop
    - check CRM resource
    - clean up
    - Verify no erros and crash
    """
    skip_release(duthost, ["201811", "201911"])

    default_max_nhop_paths = 32
    nhop_group_limit = 1024
    # program more than the advertised limit
    extra_nhops = 10

    asic = duthost.asic_instance()

    # find out MAX NHOP group count supported on the platform
    result = asic.run_redis_cmd(
        argv=["redis-cli", "-n", 6, "HGETALL", "SWITCH_CAPABILITY|switch"])
    it = iter(result)
    switch_capability = dict(zip(it, it))
    max_nhop = switch_capability.get("MAX_NEXTHOP_GROUP_COUNT")
    max_nhop = nhop_group_limit if max_nhop == None else int(max_nhop)
    nhop_group_count = min(max_nhop, nhop_group_limit) + extra_nhops

    # find out an active IP port
    ip_ifaces = asic.get_active_ip_interfaces(tbinfo).keys()
    pytest_assert(len(ip_ifaces), "No IP interfaces found")
    eth_if = ip_ifaces[0]

    # Generate ARP entries
    arp_count = 40
    arplist = Arp(duthost, asic, arp_count, eth_if)
    arplist.arps_add()

    # indices
    indices = range(arp_count)
    ip_indices = combinations(indices, default_max_nhop_paths)

    # intitialize log analyzer
    marker = "NHOP TEST PATH COUNT {} {}".format(nhop_group_count, eth_if)
    loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix=marker)
    marker = loganalyzer.init()
    loganalyzer.load_common_config()
    loganalyzer.expect_regex = []
    loganalyzer.ignore_regex.extend(loganalyzer_ignore_regex_list())

    ip_prefix = ipaddr.IPAddress("192.168.0.0")

    # list of all IPs available to generate a nexthop group
    ip_list = arplist.ip_mac_list

    crm_before = get_crm_info(duthost, asic)

    # increase CRM polling time
    asic.command("crm config polling interval 10")

    logging.info("Adding {} next hops on {}".format(nhop_group_count, eth_if))

    # create nexthop group
    nhop = IPRoutes(duthost, asic)
    try:
        for i, indx_list in zip(range(nhop_group_count), ip_indices):
            # get a list of unique group of next hop IPs
            ips = [arplist.ip_mac_list[x].ip for x in indx_list]

            ip_route = "{}/31".format(ip_prefix + (2 * i))

            # add IP route with the next hop group created
            nhop.add_ip_route(ip_route, ips)

        nhop.program_routes()
        # wait for routes to be synced and programmed
        time.sleep(120)
        crm_after = get_crm_info(duthost, asic)

    finally:
        nhop.delete_routes()
        arplist.clean_up()
        asic.command("crm config polling interval {}".format(
            crm_before["polling"]))

    # check for any errors or crash
    loganalyzer.analyze(marker)

    # verify the test used up all the NHOP group resources
    # skip this check on Mellanox as ASIC resources are shared
    if not is_mellanox_device(duthost):
        pytest_assert(
            crm_after["available"] == 0,
            "Unused NHOP group resource: {}, used:{}".format(
                crm_after["available"], crm_after["used"]))