def Trigger(tc): # move device.json cmd = "mv /device.json /nic/conf/" resp = api.RunNaplesConsoleCmd(tc.nodes[0], cmd) # load drivers cmd = "insmod /nic/bin/ionic_mnic.ko && insmod /nic/bin/mnet_uio_pdrv_genirq.ko && insmod /nic/bin/mnet.ko" resp = api.RunNaplesConsoleCmd(tc.nodes[0], cmd) # start athena app cmd = "/nic/tools/start-agent-skip-dpdk.sh" resp = api.RunNaplesConsoleCmd(tc.nodes[0], cmd) # wait for athena app to be up utils.Sleep(80) # configure int_mnic0 cmd = "ifconfig int_mnic0 " + tc.int_mnic_ip + " netmask 255.255.255.0" resp = api.RunNaplesConsoleCmd(tc.nodes[0], cmd) # run plugctl to gracefully bring up the PCI device on host req = api.Trigger_CreateExecuteCommandsRequest(serial=True) cmd = "./plugctl.sh in" api.Trigger_AddHostCommand(req, tc.bitw_node_name, cmd) resp = api.Trigger(req) cmd = resp.commands[0] api.PrintCommandResults(cmd) if cmd.exit_code != 0: api.Logger.error("Failed to gracefully bring up the PCI device on host %s" % \ tc.bitw_node_name) return api.types.status.FAILURE # get host internal mgmt intf host_intfs = naples_host.GetHostInternalMgmtInterfaces(tc.bitw_node_name) # Assuming single nic per host if len(host_intfs) == 0: api.Logger.error('Failed to get host interfaces') return api.types.status.FAILURE intf = host_intfs[0] ip_addr = str(ip_address(tc.int_mnic_ip.rstrip()) + 1) req = api.Trigger_CreateExecuteCommandsRequest(serial=True) cmd = "ifconfig " + str(intf) + " " + ip_addr + "/24 up" api.Trigger_AddHostCommand(req, tc.bitw_node_name, cmd) resp = api.Trigger(req) cmd = resp.commands[0] api.PrintCommandResults(cmd) if cmd.exit_code != 0: api.Logger.error("Failed to gracefully bring up the internal mgmt intf on host %s" % \ tc.bitw_node_name) return api.types.status.FAILURE return api.types.status.SUCCESS
def checkUpgradeStatusViaConsole(tc): result = api.types.status.SUCCESS status_in_progress = True retry_count = 0 while status_in_progress: misc_utils.Sleep(1) retry_count += 1 if retry_count == 300: # break if status is still in-progress after max retries result = api.types.status.FAILURE break status_in_progress = False for node in tc.nodes: (resp, exit_code) = api.RunNaplesConsoleCmd( node, "grep -vi in-progress /update/pds_upg_status.txt", True) api.Logger.verbose("checking upgrade for node: %s, exit_code:%s " % (node, exit_code)) if exit_code != 0: status_in_progress = True break else: api.Logger.info( "Status other than in-progress found in %s, /update/pds_upg_status.txt" % node) lines = resp.split('\r\n') for line in lines: api.Logger.info(line.strip()) if retry_count % 10 == 0: api.Logger.info( "Checking for status not in-progress in file /update/pds_upg_status.txt, retries: %s" % retry_count) if status_in_progress: continue for node in tc.nodes: (resp, exit_code) = api.RunNaplesConsoleCmd( node, "grep -i success /update/pds_upg_status.txt", True) api.Logger.info( "Checking for success status in file /update/pds_upg_status.txt" ) if exit_code != 0: result = api.types.status.FAILURE else: api.Logger.info( "Success Status found in /update/pds_upg_status.txt") if status_in_progress: api.Logger.error("Upgrade Failed: Status is still IN-PROGRESS") return result
def HitlessGetRunningInstance(node): # console should be outside, not in any instance (resp, exit_code) = api.RunNaplesConsoleCmd(node, "grep -q '/mnt/a' /proc/mounts", True) if exit_code == 0: return HITLESS_INSTANCE_A (resp, exit_code) = api.RunNaplesConsoleCmd(node, "grep -q '/mnt/b' /proc/mounts", True) if exit_code == 0: return HITLESS_INSTANCE_B api.Logger.error(f"Upgrade hitless instance get failed for node {node}") return HITLESS_INSTANCE_NONE
def HitlessRunCmdOnInstance(node, instance, cmd): if instance == HITLESS_INSTANCE_A: login = "******" else: login = "******" (resp, exit_code) = api.RunNaplesConsoleCmd(node, login, True) if exit_code != 0: api.Logger.error(f"Upgrade hitless, cmd {login} failed on node {node}") return api.types.status.FAILURE cmd = f"{cmd} && exit" # this exit is to exit the penvisor attach (resp, exit_code) = api.RunNaplesConsoleCmd(node, cmd, True) if exit_code != 0: api.Logger.error(f"Upgrade hitless, cmd {cmd} failed on node {node}") return api.types.status.FAILURE api.Logger.info(f"Upgrade hitless, cmd {cmd} success on node {node}") return api.types.status.SUCCESS
def Setup(tc): tc.bitw_node_name = api.GetTestsuiteAttr("bitw_node_name") tc.intfs = api.GetTestsuiteAttr("inb_mnic_intfs") tc.nodes = api.GetNaplesHostnames() # copy device.json to naples device_json_fname = api.GetTopDir() + '/nic/conf/athena/device.json' api.CopyToNaples(tc.bitw_node_name, [device_json_fname], "") # copy plugctl.sh to host plugctl_fname = api.GetTopDir( ) + '/iota/test/athena/testcases/networking/scripts/plugctl.sh' api.CopyToHost(tc.bitw_node_name, [plugctl_fname], "") # get the IP address of int_mnic and store it req = api.Trigger_CreateExecuteCommandsRequest(serial=True) tc.int_mnic_ip = None cmd = "ifconfig int_mnic0 | grep inet | cut -d ':' -f 2 | cut -d ' ' -f 1" api.Trigger_AddNaplesCommand(req, tc.bitw_node_name, cmd) resp = api.Trigger(req) cmd = resp.commands[0] api.PrintCommandResults(cmd) if cmd.exit_code != 0: api.Logger.error("Failed to get int_mnic0 IP on node %s" % \ tc.bitw_node_name) return api.types.status.FAILURE else: tc.int_mnic_ip = str(cmd.stdout) # delete pensando_pre_init.sh req = api.Trigger_CreateExecuteCommandsRequest(serial=True) cmd = "cd /sysconfig/config0 && touch pensando_pre_init.sh && rm pensando_pre_init.sh" api.Trigger_AddNaplesCommand(req, tc.bitw_node_name, cmd) # bring down linux interfaces for intf in tc.intfs: # unconfigure inb_mnic0 and inb_mnic1 ip_addr = str(ip_address(intf['ip']) + 1) utils.configureNaplesIntf(req, tc.bitw_node_name, intf['name'], ip_addr, '24', vlan=intf['vlan'], unconfig=True) resp = api.Trigger(req) for cmd in resp.commands: api.PrintCommandResults(cmd) if cmd.exit_code != 0: api.Logger.error("Failed to bring down linux interfaces on node %s" % \ tc.bitw_node_name) return api.types.status.FAILURE # unconfigure int_mnic0 cmd = "ifconfig int_mnic0 down && ip addr del " + tc.int_mnic_ip resp = api.RunNaplesConsoleCmd(tc.nodes[0], cmd) # unload drivers cmd = "rmmod mnet && rmmod mnet_uio_pdrv_genirq && rmmod ionic_mnic" resp = api.RunNaplesConsoleCmd(tc.nodes[0], cmd) # run plugctl to gracefully bring down the PCI device on host req = api.Trigger_CreateExecuteCommandsRequest(serial=True) cmd = "./plugctl.sh out" api.Trigger_AddHostCommand(req, tc.bitw_node_name, cmd) resp = api.Trigger(req) cmd = resp.commands[0] api.PrintCommandResults(cmd) if cmd.exit_code != 0: api.Logger.error("Failed to gracefully bring down the PCI device on host %s" % \ tc.bitw_node_name) return api.types.status.FAILURE # kill athena primary app cmd = "pkill athena_app" resp = api.RunNaplesConsoleCmd(tc.nodes[0], cmd) return api.types.status.SUCCESS
def Verify(tc): result = api.types.status.SUCCESS if api.IsDryrun(): # no upgrade done in case of dryrun return result upg_switchover_time = 70 # wait for upgrade to complete. status can be found from the presence of /update/pds_upg_status.txt api.Logger.info( f"Sleep for {upg_switchover_time} secs before checking for Upgrade status" ) misc_utils.Sleep(upg_switchover_time) if checkUpgradeStatusViaConsole(tc) != api.types.status.SUCCESS: api.Logger.error( "Failed in validation of Upgrade Manager completion status via Console" ) result = api.types.status.FAILURE if not naples_utils.EnableReachability(tc.nodes): api.Logger.error( f"Failed to reach naples {tc.nodes} post upgrade switchover") result = api.types.status.FAILURE # verify mgmt connectivity if VerifyMgmtConnectivity(tc) != api.types.status.SUCCESS: api.Logger.error("Failed in Mgmt Connectivity Check after Upgrade .") result = api.types.status.FAILURE if result != api.types.status.SUCCESS: api.Logger.info("DUMP Upgrade Manager Logs") # Failure could be due to upgrade failure before/after switchover or # management connectivity failure. Hence dump the upgrade_mgr.log # via console for debug purpose. api.Logger.SetSkipLogPrefix(True) for node in tc.nodes: (resp, exit_code) = api.RunNaplesConsoleCmd(node, "cat /obfl/upgrademgr.log", True) if exit_code != 0: api.Logger.info("Failed to dump /obfl/upgrademgr.log from " "node: %s, exit_code:%s " % (node, exit_code)) else: api.Logger.info("Dump /obfl/upgrademgr.log from " "node: %s, exit_code:%s " % (node, exit_code)) lines = resp.split('\r\n') for line in lines: api.Logger.info(line.strip()) api.Logger.SetSkipLogPrefix(False) return api.types.status.FAILURE # push configs after upgrade UpdateConfigAfterUpgrade(tc) # verify PDS instances if check_pds_instance(tc) != api.types.status.SUCCESS: api.Logger.error("Failed in check_pds_instances") result = api.types.status.FAILURE if check_pds_agent_debug_data(tc) != api.types.status.SUCCESS: api.Logger.error("Failed in check_pds_agent_debug_data") result = api.types.status.FAILURE # TODO: verify BGP Underlay (REMOVE WHEN PING API IS UPDATED) if bgp_utils.check_underlay_bgp_peer_connectivity( sleep_time=15, timeout_val=120) != api.types.status.SUCCESS: api.Logger.error("Failed in underlay connectivity check") #return api.types.status.FAILURE # verify connectivity if VerifyConnectivity(tc) != api.types.status.SUCCESS: api.Logger.error("Failed in Connectivity Check after Upgrade .") if not skip_connectivity_failure: result = api.types.status.FAILURE if tc.upgrade_mode: tc.sleep = 100 # If rollout status is failure, then no need to wait for traffic test if result == api.types.status.SUCCESS: api.Logger.info("Sleep for %s secs for traffic test to complete" % tc.sleep) misc_utils.Sleep(tc.sleep) pkt_loss_duration = 0 # terminate background traffic and calculate packet loss duration if tc.background: if ping.TestTerminateBackgroundPing(tc, tc.pktsize,\ pktlossverif=tc.pktlossverif) != api.types.status.SUCCESS: api.Logger.error( "Failed in Ping background command termination.") result = api.types.status.FAILURE # calculate max packet loss duration for background ping pkt_loss_duration = ping.GetMaxPktLossDuration( tc, interval=tc.interval) if pkt_loss_duration != 0: indent = "-" * 10 if tc.pktlossverif: result = api.types.status.FAILURE api.Logger.error( f"{indent} Packet Loss duration during UPGRADE of {tc.nodes} is {pkt_loss_duration} secs {indent}" ) if tc.allowed_down_time and (pkt_loss_duration > tc.allowed_down_time): api.Logger.error( f"{indent} Exceeded allowed Loss Duration {tc.allowed_down_time} secs {indent}" ) # Failing test based on longer traffic loss duration is commented for now. # enable below line when needed. #result = api.types.status.FAILURE else: api.Logger.info("No Packet Loss Found during UPGRADE Test") if upgrade_utils.VerifyUpgLog(tc.nodes, tc.GetLogsDir()): api.Logger.error("Failed to verify the upgrademgr logs...") if result == api.types.status.SUCCESS: api.Logger.info(f"Upgrade: Completed Successfully for {tc.nodes}") else: api.Logger.info(f"Upgrade: Failed for {tc.nodes}") return result
def Verify(tc): result = api.types.status.SUCCESS if api.IsDryrun(): return result # Stop Trex traffic if tc.trex: traffic_gen.stop_trex_traffic(tc.trex_peers) # Check upgrade status if tc.failure_stage != None: # TODO : details check on stage etc status = UpgStatus.UPG_STATUS_FAILED else: status = UpgStatus.UPG_STATUS_SUCCESS for node in tc.nodes: if not upgrade_utils.CheckUpgradeStatus(node, status): result = api.types.status.FAILURE # validate the configuration result = upgrade_utils.HitlessUpgradeValidateConfig(tc) if result != api.types.status.SUCCESS: api.Logger.info("Ignoring the configuration validation failure") result = api.types.status.SUCCESS # verify mgmt connectivity if traffic.VerifyMgmtConnectivity(tc.nodes) != api.types.status.SUCCESS: api.Logger.error("Failed in Mgmt Connectivity Check after Upgrade .") result = api.types.status.FAILURE if result != api.types.status.SUCCESS: api.Logger.info("DUMP Upgrade Manager Logs") # Failure could be due to upgrade failure before/after switchover or # management connectivity failure. Hence dump the upgrade_mgr.log # via console for debug purpose. api.Logger.SetSkipLogPrefix(True) for node in tc.nodes: (resp, exit_code) = api.RunNaplesConsoleCmd(node, "cat /obfl/upgrademgr.log", True) if exit_code != 0: api.Logger.info("Failed to dump /obfl/upgrademgr.log from " "node: %s, exit_code:%s " % (node, exit_code)) else: api.Logger.info("Dump /obfl/upgrademgr.log from " "node: %s, exit_code:%s " % (node, exit_code)) lines = resp.split('\r\n') for line in lines: api.Logger.verbose(line.strip()) api.Logger.SetSkipLogPrefix(False) return api.types.status.FAILURE check_pds_agent_debug_data(tc) # verify workload connectivity if VerifyConnectivity(tc) != api.types.status.SUCCESS: api.Logger.error("Failed in Connectivity Check after Upgrade.") if not SKIP_CONNECTIVITY_FAILURE: result = api.types.status.FAILURE tc.sleep = 100 # If rollout status is failure, then no need to wait for traffic test if result == api.types.status.SUCCESS: api.Logger.info("Sleep for %s secs for traffic test to complete" % tc.sleep) misc_utils.Sleep(tc.sleep) # terminate background traffic and calculate packet loss duration result = ping_traffic_stop_and_verify(tc) if result == api.types.status.SUCCESS and tc.iperf: result = iperf_traffic_stop_and_verify(tc) if upgrade_utils.VerifyUpgLog(tc.nodes, tc.GetLogsDir()): api.Logger.error("Failed to verify the upgrademgr logs...") nodes = ",".join(tc.nodes) if result == api.types.status.SUCCESS: api.Logger.info(f"Upgrade: Completed Successfully for {nodes}") else: api.Logger.error(f"Upgrade: Failed for {nodes}") return result
def Setup(tc): # parse iterator args # parse_args(tc) # skip some iterator cases if skip_curr_test(tc): return api.types.status.SUCCESS # node init tc.tester_node = None tc.tester_node_name = None tc.dut_node = None # init response list tc.resp = [] workloads = api.GetWorkloads() if len(workloads) == 0: api.Logger.error('No workloads available') return api.types.status.FAILURE # initialize tester-node and dut-node. tc.nodes = api.GetNodes() for node in tc.nodes: if api.GetNicType(node.Name()) == 'intel': tc.tester_node = node tc.tester_node_name = node.Name() tc.tester_node_mgmt_ip = api.GetMgmtIPAddress(node.Name()) api.Logger.info('tester node: %s mgmt IP: %s' % (node.Name(), tc.tester_node_mgmt_ip)) else: tc.dut_node = node tc.dut_node_mgmt_ip = api.GetMgmtIPAddress(node.Name()) api.Logger.info('dut node: %s mgmt IP: %s' % (node.Name(), tc.dut_node_mgmt_ip)) # create tar.gz file of dpdk and dpdk-test sdk_fullpath = api.GetTopDir() + SDK_SRC_PATH dpdk_tar_path = api.GetTopDir() + DPDK_TAR_FILE tar = tarfile.open(dpdk_tar_path, "w:gz") os.chdir(sdk_fullpath) tar.add("dpdk") os.chdir("dpdk-test") for name in os.listdir("."): tar.add(name) tar.close() api.Logger.info("dpdk-test tarfile location is: " + dpdk_tar_path) api.Logger.info("Configuring DTS on " + tc.tester_node_mgmt_ip) # copy dpdk-test.tar.gz to tester node. api.CopyToHost(tc.tester_node.Name(), [dpdk_tar_path], "") # untar dpdk-test.tar.gz and configure tester to run DTS req = api.Trigger_CreateExecuteCommandsRequest() trig_cmd1 = "tar -xzvf dpdk-test.tar.gz" trig_cmd2 = "scripts/config_tester.sh %s %s" % (tc.dut_node_mgmt_ip, tc.tester_node_mgmt_ip) api.Trigger_AddHostCommand(req, tc.tester_node.Name(), trig_cmd1, timeout=60) api.Trigger_AddHostCommand(req, tc.tester_node.Name(), trig_cmd2, timeout=60) trig_resp = api.Trigger(req) tc.resp.append(trig_resp) # disable internal mnic cmd = "ifconfig inb_mnic0 down && ifconfig inb_mnic1 down" resp = api.RunNaplesConsoleCmd(tc.dut_node.Name(), cmd) return api.types.status.SUCCESS