Пример #1
0
def Trigger(tc):

    # move device.json
    cmd = "mv /device.json /nic/conf/"
    resp = api.RunNaplesConsoleCmd(tc.nodes[0], cmd)

    # load drivers
    cmd = "insmod /nic/bin/ionic_mnic.ko && insmod /nic/bin/mnet_uio_pdrv_genirq.ko && insmod /nic/bin/mnet.ko"
    resp = api.RunNaplesConsoleCmd(tc.nodes[0], cmd)

    # start athena app
    cmd = "/nic/tools/start-agent-skip-dpdk.sh"
    resp = api.RunNaplesConsoleCmd(tc.nodes[0], cmd)

    # wait for athena app to be up
    utils.Sleep(80)

    # configure int_mnic0
    cmd = "ifconfig int_mnic0 " + tc.int_mnic_ip + " netmask 255.255.255.0"
    resp = api.RunNaplesConsoleCmd(tc.nodes[0], cmd)

    # run plugctl to gracefully bring up the PCI device on host
    req = api.Trigger_CreateExecuteCommandsRequest(serial=True)
    cmd = "./plugctl.sh in"
    api.Trigger_AddHostCommand(req, tc.bitw_node_name, cmd)
    resp = api.Trigger(req)
    cmd = resp.commands[0]
    api.PrintCommandResults(cmd)

    if cmd.exit_code != 0:
        api.Logger.error("Failed to gracefully bring up the PCI device on host %s" % \
                          tc.bitw_node_name)
        return api.types.status.FAILURE

    # get host internal mgmt intf
    host_intfs = naples_host.GetHostInternalMgmtInterfaces(tc.bitw_node_name)
    # Assuming single nic per host
    if len(host_intfs) == 0:
        api.Logger.error('Failed to get host interfaces')
        return api.types.status.FAILURE

    intf = host_intfs[0]
    ip_addr = str(ip_address(tc.int_mnic_ip.rstrip()) + 1)

    req = api.Trigger_CreateExecuteCommandsRequest(serial=True)
    cmd = "ifconfig " + str(intf) + " " + ip_addr + "/24 up"
    api.Trigger_AddHostCommand(req, tc.bitw_node_name, cmd)
    resp = api.Trigger(req)
    cmd = resp.commands[0]
    api.PrintCommandResults(cmd)

    if cmd.exit_code != 0:
        api.Logger.error("Failed to gracefully bring up the internal mgmt intf on host %s" % \
                          tc.bitw_node_name)
        return api.types.status.FAILURE

    return api.types.status.SUCCESS
Пример #2
0
def checkUpgradeStatusViaConsole(tc):
    result = api.types.status.SUCCESS
    status_in_progress = True
    retry_count = 0
    while status_in_progress:
        misc_utils.Sleep(1)
        retry_count += 1
        if retry_count == 300:
            # break if status is still in-progress after max retries
            result = api.types.status.FAILURE
            break

        status_in_progress = False
        for node in tc.nodes:
            (resp, exit_code) = api.RunNaplesConsoleCmd(
                node, "grep -vi in-progress /update/pds_upg_status.txt", True)

            api.Logger.verbose("checking upgrade for node: %s, exit_code:%s " %
                               (node, exit_code))
            if exit_code != 0:
                status_in_progress = True
                break
            else:
                api.Logger.info(
                    "Status other than in-progress found in %s, /update/pds_upg_status.txt"
                    % node)
                lines = resp.split('\r\n')
                for line in lines:
                    api.Logger.info(line.strip())

        if retry_count % 10 == 0:
            api.Logger.info(
                "Checking for status not in-progress in file /update/pds_upg_status.txt, retries: %s"
                % retry_count)

        if status_in_progress:
            continue

        for node in tc.nodes:
            (resp, exit_code) = api.RunNaplesConsoleCmd(
                node, "grep -i success /update/pds_upg_status.txt", True)
            api.Logger.info(
                "Checking for success status in file /update/pds_upg_status.txt"
            )
            if exit_code != 0:
                result = api.types.status.FAILURE
            else:
                api.Logger.info(
                    "Success Status found in /update/pds_upg_status.txt")

    if status_in_progress:
        api.Logger.error("Upgrade Failed: Status is still IN-PROGRESS")

    return result
Пример #3
0
def HitlessGetRunningInstance(node):
    # console should be outside, not in any instance
    (resp,
     exit_code) = api.RunNaplesConsoleCmd(node,
                                          "grep -q '/mnt/a' /proc/mounts",
                                          True)
    if exit_code == 0:
        return HITLESS_INSTANCE_A
    (resp,
     exit_code) = api.RunNaplesConsoleCmd(node,
                                          "grep -q '/mnt/b' /proc/mounts",
                                          True)
    if exit_code == 0:
        return HITLESS_INSTANCE_B
    api.Logger.error(f"Upgrade hitless instance get failed for node {node}")
    return HITLESS_INSTANCE_NONE
Пример #4
0
def HitlessRunCmdOnInstance(node, instance, cmd):
    if instance == HITLESS_INSTANCE_A:
        login = "******"
    else:
        login = "******"

    (resp, exit_code) = api.RunNaplesConsoleCmd(node, login, True)
    if exit_code != 0:
        api.Logger.error(f"Upgrade hitless, cmd {login} failed on node {node}")
        return api.types.status.FAILURE

    cmd = f"{cmd} && exit"  # this exit is to exit the penvisor attach
    (resp, exit_code) = api.RunNaplesConsoleCmd(node, cmd, True)
    if exit_code != 0:
        api.Logger.error(f"Upgrade hitless, cmd {cmd} failed on node {node}")
        return api.types.status.FAILURE

    api.Logger.info(f"Upgrade hitless, cmd {cmd} success on node {node}")
    return api.types.status.SUCCESS
Пример #5
0
def Setup(tc):

    tc.bitw_node_name = api.GetTestsuiteAttr("bitw_node_name")
    tc.intfs = api.GetTestsuiteAttr("inb_mnic_intfs")
    tc.nodes = api.GetNaplesHostnames()

    # copy device.json to naples
    device_json_fname = api.GetTopDir() + '/nic/conf/athena/device.json'
    api.CopyToNaples(tc.bitw_node_name, [device_json_fname], "")

    # copy plugctl.sh to host
    plugctl_fname = api.GetTopDir(
    ) + '/iota/test/athena/testcases/networking/scripts/plugctl.sh'
    api.CopyToHost(tc.bitw_node_name, [plugctl_fname], "")

    # get the IP address of int_mnic and store it
    req = api.Trigger_CreateExecuteCommandsRequest(serial=True)

    tc.int_mnic_ip = None
    cmd = "ifconfig int_mnic0 | grep inet | cut -d ':' -f 2 | cut -d ' ' -f 1"
    api.Trigger_AddNaplesCommand(req, tc.bitw_node_name, cmd)

    resp = api.Trigger(req)

    cmd = resp.commands[0]
    api.PrintCommandResults(cmd)

    if cmd.exit_code != 0:
        api.Logger.error("Failed to get int_mnic0 IP on node %s" % \
                          tc.bitw_node_name)
        return api.types.status.FAILURE
    else:
        tc.int_mnic_ip = str(cmd.stdout)

    # delete pensando_pre_init.sh
    req = api.Trigger_CreateExecuteCommandsRequest(serial=True)

    cmd = "cd /sysconfig/config0 && touch pensando_pre_init.sh && rm pensando_pre_init.sh"
    api.Trigger_AddNaplesCommand(req, tc.bitw_node_name, cmd)

    # bring down linux interfaces
    for intf in tc.intfs:
        # unconfigure inb_mnic0 and inb_mnic1
        ip_addr = str(ip_address(intf['ip']) + 1)
        utils.configureNaplesIntf(req,
                                  tc.bitw_node_name,
                                  intf['name'],
                                  ip_addr,
                                  '24',
                                  vlan=intf['vlan'],
                                  unconfig=True)

    resp = api.Trigger(req)

    for cmd in resp.commands:
        api.PrintCommandResults(cmd)

        if cmd.exit_code != 0:
            api.Logger.error("Failed to bring down linux interfaces on node %s" % \
                              tc.bitw_node_name)
            return api.types.status.FAILURE

    # unconfigure int_mnic0
    cmd = "ifconfig int_mnic0 down && ip addr del " + tc.int_mnic_ip
    resp = api.RunNaplesConsoleCmd(tc.nodes[0], cmd)

    # unload drivers
    cmd = "rmmod mnet && rmmod mnet_uio_pdrv_genirq && rmmod ionic_mnic"
    resp = api.RunNaplesConsoleCmd(tc.nodes[0], cmd)

    # run plugctl to gracefully bring down the PCI device on host
    req = api.Trigger_CreateExecuteCommandsRequest(serial=True)
    cmd = "./plugctl.sh out"
    api.Trigger_AddHostCommand(req, tc.bitw_node_name, cmd)
    resp = api.Trigger(req)

    cmd = resp.commands[0]
    api.PrintCommandResults(cmd)

    if cmd.exit_code != 0:
        api.Logger.error("Failed to gracefully bring down the PCI device on host %s" % \
                          tc.bitw_node_name)
        return api.types.status.FAILURE

    # kill athena primary app
    cmd = "pkill athena_app"
    resp = api.RunNaplesConsoleCmd(tc.nodes[0], cmd)

    return api.types.status.SUCCESS
Пример #6
0
def Verify(tc):
    result = api.types.status.SUCCESS

    if api.IsDryrun():
        # no upgrade done in case of dryrun
        return result

    upg_switchover_time = 70
    # wait for upgrade to complete. status can be found from the presence of /update/pds_upg_status.txt
    api.Logger.info(
        f"Sleep for {upg_switchover_time} secs before checking for Upgrade status"
    )
    misc_utils.Sleep(upg_switchover_time)

    if checkUpgradeStatusViaConsole(tc) != api.types.status.SUCCESS:
        api.Logger.error(
            "Failed in validation of Upgrade Manager completion status via Console"
        )
        result = api.types.status.FAILURE

    if not naples_utils.EnableReachability(tc.nodes):
        api.Logger.error(
            f"Failed to reach naples {tc.nodes} post upgrade switchover")
        result = api.types.status.FAILURE

    # verify mgmt connectivity
    if VerifyMgmtConnectivity(tc) != api.types.status.SUCCESS:
        api.Logger.error("Failed in Mgmt Connectivity Check after Upgrade .")
        result = api.types.status.FAILURE

    if result != api.types.status.SUCCESS:
        api.Logger.info("DUMP Upgrade Manager Logs")
        # Failure could be due to upgrade failure before/after switchover or
        # management connectivity failure. Hence dump the upgrade_mgr.log
        # via console for debug purpose.
        api.Logger.SetSkipLogPrefix(True)
        for node in tc.nodes:
            (resp,
             exit_code) = api.RunNaplesConsoleCmd(node,
                                                  "cat /obfl/upgrademgr.log",
                                                  True)
            if exit_code != 0:
                api.Logger.info("Failed to dump /obfl/upgrademgr.log from "
                                "node: %s, exit_code:%s " % (node, exit_code))
            else:
                api.Logger.info("Dump /obfl/upgrademgr.log from "
                                "node: %s, exit_code:%s " % (node, exit_code))
                lines = resp.split('\r\n')
                for line in lines:
                    api.Logger.info(line.strip())
        api.Logger.SetSkipLogPrefix(False)
        return api.types.status.FAILURE

    # push configs after upgrade
    UpdateConfigAfterUpgrade(tc)

    # verify PDS instances
    if check_pds_instance(tc) != api.types.status.SUCCESS:
        api.Logger.error("Failed in check_pds_instances")
        result = api.types.status.FAILURE

    if check_pds_agent_debug_data(tc) != api.types.status.SUCCESS:
        api.Logger.error("Failed in check_pds_agent_debug_data")
        result = api.types.status.FAILURE

    # TODO: verify BGP Underlay (REMOVE WHEN PING API IS UPDATED)
    if bgp_utils.check_underlay_bgp_peer_connectivity(
            sleep_time=15, timeout_val=120) != api.types.status.SUCCESS:
        api.Logger.error("Failed in underlay connectivity check")
        #return api.types.status.FAILURE

    # verify connectivity
    if VerifyConnectivity(tc) != api.types.status.SUCCESS:
        api.Logger.error("Failed in Connectivity Check after Upgrade .")
        if not skip_connectivity_failure:
            result = api.types.status.FAILURE

    if tc.upgrade_mode:
        tc.sleep = 100
        # If rollout status is failure, then no need to wait for traffic test
        if result == api.types.status.SUCCESS:
            api.Logger.info("Sleep for %s secs for traffic test to complete" %
                            tc.sleep)
            misc_utils.Sleep(tc.sleep)

        pkt_loss_duration = 0
        # terminate background traffic and calculate packet loss duration
        if tc.background:
            if ping.TestTerminateBackgroundPing(tc, tc.pktsize,\
                  pktlossverif=tc.pktlossverif) != api.types.status.SUCCESS:
                api.Logger.error(
                    "Failed in Ping background command termination.")
                result = api.types.status.FAILURE
            # calculate max packet loss duration for background ping
            pkt_loss_duration = ping.GetMaxPktLossDuration(
                tc, interval=tc.interval)
            if pkt_loss_duration != 0:
                indent = "-" * 10
                if tc.pktlossverif:
                    result = api.types.status.FAILURE
                api.Logger.error(
                    f"{indent} Packet Loss duration during UPGRADE of {tc.nodes} is {pkt_loss_duration} secs {indent}"
                )
                if tc.allowed_down_time and (pkt_loss_duration >
                                             tc.allowed_down_time):
                    api.Logger.error(
                        f"{indent} Exceeded allowed Loss Duration {tc.allowed_down_time} secs {indent}"
                    )
                    # Failing test based on longer traffic loss duration is commented for now.
                    # enable below line when needed.
                    #result = api.types.status.FAILURE
            else:
                api.Logger.info("No Packet Loss Found during UPGRADE Test")

    if upgrade_utils.VerifyUpgLog(tc.nodes, tc.GetLogsDir()):
        api.Logger.error("Failed to verify the upgrademgr logs...")

    if result == api.types.status.SUCCESS:
        api.Logger.info(f"Upgrade: Completed Successfully for {tc.nodes}")
    else:
        api.Logger.info(f"Upgrade: Failed for {tc.nodes}")
    return result
Пример #7
0
def Verify(tc):
    result = api.types.status.SUCCESS

    if api.IsDryrun():
        return result

    # Stop Trex traffic
    if tc.trex:
        traffic_gen.stop_trex_traffic(tc.trex_peers)

    # Check upgrade status
    if tc.failure_stage != None:
        # TODO : details check on stage etc
        status = UpgStatus.UPG_STATUS_FAILED
    else:
        status = UpgStatus.UPG_STATUS_SUCCESS
    for node in tc.nodes:
        if not upgrade_utils.CheckUpgradeStatus(node, status):
            result = api.types.status.FAILURE

    # validate the configuration
    result = upgrade_utils.HitlessUpgradeValidateConfig(tc)
    if result != api.types.status.SUCCESS:
        api.Logger.info("Ignoring the configuration validation failure")
        result = api.types.status.SUCCESS

    # verify mgmt connectivity
    if traffic.VerifyMgmtConnectivity(tc.nodes) != api.types.status.SUCCESS:
        api.Logger.error("Failed in Mgmt Connectivity Check after Upgrade .")
        result = api.types.status.FAILURE

    if result != api.types.status.SUCCESS:
        api.Logger.info("DUMP Upgrade Manager Logs")
        # Failure could be due to upgrade failure before/after switchover or
        # management connectivity failure. Hence dump the upgrade_mgr.log
        # via console for debug purpose.
        api.Logger.SetSkipLogPrefix(True)
        for node in tc.nodes:
            (resp,
             exit_code) = api.RunNaplesConsoleCmd(node,
                                                  "cat /obfl/upgrademgr.log",
                                                  True)
            if exit_code != 0:
                api.Logger.info("Failed to dump /obfl/upgrademgr.log from "
                                "node: %s, exit_code:%s " % (node, exit_code))
            else:
                api.Logger.info("Dump /obfl/upgrademgr.log from "
                                "node: %s, exit_code:%s " % (node, exit_code))
                lines = resp.split('\r\n')
                for line in lines:
                    api.Logger.verbose(line.strip())
        api.Logger.SetSkipLogPrefix(False)
        return api.types.status.FAILURE

    check_pds_agent_debug_data(tc)

    # verify workload connectivity
    if VerifyConnectivity(tc) != api.types.status.SUCCESS:
        api.Logger.error("Failed in Connectivity Check after Upgrade.")
        if not SKIP_CONNECTIVITY_FAILURE:
            result = api.types.status.FAILURE

    tc.sleep = 100
    # If rollout status is failure, then no need to wait for traffic test
    if result == api.types.status.SUCCESS:
        api.Logger.info("Sleep for %s secs for traffic test to complete" %
                        tc.sleep)
        misc_utils.Sleep(tc.sleep)

    # terminate background traffic and calculate packet loss duration
    result = ping_traffic_stop_and_verify(tc)
    if result == api.types.status.SUCCESS and tc.iperf:
        result = iperf_traffic_stop_and_verify(tc)

    if upgrade_utils.VerifyUpgLog(tc.nodes, tc.GetLogsDir()):
        api.Logger.error("Failed to verify the upgrademgr logs...")

    nodes = ",".join(tc.nodes)
    if result == api.types.status.SUCCESS:
        api.Logger.info(f"Upgrade: Completed Successfully for {nodes}")
    else:
        api.Logger.error(f"Upgrade: Failed for {nodes}")
    return result
Пример #8
0
def Setup(tc):

    # parse iterator args
    # parse_args(tc)

    # skip some iterator cases
    if skip_curr_test(tc):
        return api.types.status.SUCCESS

    # node init
    tc.tester_node = None
    tc.tester_node_name = None
    tc.dut_node = None

    # init response list
    tc.resp = []

    workloads = api.GetWorkloads()
    if len(workloads) == 0:
        api.Logger.error('No workloads available')
        return api.types.status.FAILURE

    # initialize tester-node and dut-node.
    tc.nodes = api.GetNodes()
    for node in tc.nodes:
        if api.GetNicType(node.Name()) == 'intel':
            tc.tester_node = node
            tc.tester_node_name = node.Name()
            tc.tester_node_mgmt_ip = api.GetMgmtIPAddress(node.Name())
            api.Logger.info('tester node: %s mgmt IP: %s' %
                            (node.Name(), tc.tester_node_mgmt_ip))
        else:
            tc.dut_node = node
            tc.dut_node_mgmt_ip = api.GetMgmtIPAddress(node.Name())
            api.Logger.info('dut node: %s mgmt IP: %s' %
                            (node.Name(), tc.dut_node_mgmt_ip))

    # create tar.gz file of dpdk and dpdk-test
    sdk_fullpath = api.GetTopDir() + SDK_SRC_PATH
    dpdk_tar_path = api.GetTopDir() + DPDK_TAR_FILE

    tar = tarfile.open(dpdk_tar_path, "w:gz")
    os.chdir(sdk_fullpath)
    tar.add("dpdk")
    os.chdir("dpdk-test")
    for name in os.listdir("."):
        tar.add(name)
    tar.close()

    api.Logger.info("dpdk-test tarfile location is: " + dpdk_tar_path)

    api.Logger.info("Configuring DTS on " + tc.tester_node_mgmt_ip)

    # copy dpdk-test.tar.gz to tester node.
    api.CopyToHost(tc.tester_node.Name(), [dpdk_tar_path], "")

    # untar dpdk-test.tar.gz and configure tester to run DTS
    req = api.Trigger_CreateExecuteCommandsRequest()
    trig_cmd1 = "tar -xzvf dpdk-test.tar.gz"
    trig_cmd2 = "scripts/config_tester.sh %s %s" % (tc.dut_node_mgmt_ip,
                                                    tc.tester_node_mgmt_ip)
    api.Trigger_AddHostCommand(req,
                               tc.tester_node.Name(),
                               trig_cmd1,
                               timeout=60)
    api.Trigger_AddHostCommand(req,
                               tc.tester_node.Name(),
                               trig_cmd2,
                               timeout=60)
    trig_resp = api.Trigger(req)
    tc.resp.append(trig_resp)

    # disable internal mnic
    cmd = "ifconfig inb_mnic0 down && ifconfig inb_mnic1 down"
    resp = api.RunNaplesConsoleCmd(tc.dut_node.Name(), cmd)

    return api.types.status.SUCCESS