Exemplo n.º 1
0
def Trigger(tc):
    i = 0
    workloads = api.GetWorkloads(tc.node)
    while i < tc.iterations:
        api.Logger.info(f"Starting iteration {i}")
        api.Logger.info(f"Clearing learn-db")
        if not learn_utils.ClearLearnData(tc.node):
            api.Logger.error("Failed to clear learn info at iteration %d" % i)
            return api.types.status.FAILURE
        api.Logger.info(f"Re-learning all endpoints")
        if not arp_utils.SendGratArp(workloads):
            api.Logger.error("Failed to send arp probes at iteration %d" % i)
            return api.types.status.FAILURE
        api.Logger.info(f"Sending some ARP replies")
        for j in range(2):
            arp_utils.SendArpReply(workloads)
            misc_utils.Sleep(2)
        learn_utils.DumpLearnData()
        if not learn_utils.ValidateLearnInfo(tc.node):
            api.Logger.error("Failed to validate learn data at iteration %d" %
                             i)
            return api.types.status.FAILURE
        i += 1
        misc_utils.Sleep(tc.interval)
    return api.types.status.SUCCESS
Exemplo n.º 2
0
def Trigger(tc):
    # Read Age and State. Sleep for ttl of the entry
    max_retry = 5
    interval = 2
    retry = 0
    while retry < max_retry:
        ret, data = learn_utils.ReadLearnIPOperData(tc.node, tc.learn_ip_obj)
        if not ret or data is None:
            return api.types.status.FAILURE
        if data['state'] == learn_pb2.EP_STATE_CREATED and data['ttl'] > 0:
            api.Logger.debug("IP endpoint is in Created state with ttl %d" %
                             data['ttl'])
            break
        else:
            misc_utils.Sleep(interval)
            retry += 1

    learn_utils.DumpLearnIP(tc.node, tc.learn_ip_obj)
    if retry == max_retry:
        api.Logger.error(
            "IP endpoint not seen in Learning state even after %d retries" %
            max_retry)
        return api.types.status.FAILURE

    misc_utils.Sleep(data['ttl'])
    return api.types.status.SUCCESS
Exemplo n.º 3
0
def Verify(tc):
    max_retry = 2
    interval = 2
    retry = 0
    deviceLearnAgeTimeout = EzAccessStoreClient[
        tc.node].GetDevice().GetLearnAgeTimeout()

    for learn_ip_obj in tc.learn_ip_obj_list:
        retry = 0
        while retry < max_retry:
            ret, data = learn_utils.ReadLearnIPOperData(tc.node, learn_ip_obj)
            if not ret or data is None:
                return api.types.status.FAILURE
            if data['state'] != learn_pb2.EP_STATE_PROBING:
                retry += 1
                misc_utils.Sleep(interval)
                continue
            else:
                break
        if retry == max_retry:
            api.Logger.error("One of the IP endpoints not in Probing state")
            learn_utils.DumpLearnIP(tc.node)
            return api.types.status.FAILURE

    api.Logger.verbose("All IP endpoints are in Probing state now")
    learn_utils.DumpLearnIP(tc.node)
    misc_utils.Sleep(
        93
    )  # sleep for 30s thrice to let 3 probes be sent + few seconds of delay

    for learn_ip_obj in tc.learn_ip_obj_list:
        ret, data = learn_utils.ReadLearnIPOperData(tc.node, learn_ip_obj)
        if not ret or data != None:
            api.Logger.error("One of IP endpoints still not deleted")
            return api.types.status.FAILURE

    api.Logger.verbose("All IP endpoints are aged out")
    ret, data = learn_utils.ReadLearnMACOperData(tc.node, tc.learn_mac_obj)
    if not ret or data is None or data['ttl'] == 0:
        api.Logger.error("MAC expected to have a non-zero age")
        return api.types.status.FAILURE

    misc_utils.Sleep(
        deviceLearnAgeTimeout)  # sleep to let mac entry get deleted

    ret, data = learn_utils.ReadLearnMACOperData(tc.node, tc.learn_mac_obj)
    if not ret or data != None:
        api.Logger.error(
            "MAC expected to be deleted by now but still hanging around")
        learn_utils.DumpLearnMAC(tc.node, tc.learn_mac_obj)
        return api.types.status.FAILURE

    api.Logger.verbose("MAC got flushed after age out")
    stats = learn_utils.GetLearnStatistics([tc.node])
    if stats[tc.node]['macageouts'] != 1 or stats[tc.node]['ipageouts'] != len(
            tc.learn_ip_obj_list):
        api.Logger.error("Ageout statistics not seen as expected")
        return api.types.status.FAILURE
    return api.types.status.SUCCESS
Exemplo n.º 4
0
def setDataPortStatePerUplink(naples_nodes, oper, id):
    uplink_list = []
    if id == 'Uplink0':
        uplink_list.append(UPLINK_PREFIX1)
    elif id == 'Uplink1':
        uplink_list.append(UPLINK_PREFIX2)
    else:
        uplink_list.append(UPLINK_PREFIX1)
        uplink_list.append(UPLINK_PREFIX2)

    if GlobalOptions.dryrun:
        return api.types.status.SUCCESS

    for node in naples_nodes:
        node_uuid = EzAccessStoreClient[node].GetNodeUuid(node)
        #node_uuid = 750763714960
        for uplink in uplink_list:
            intf_uuid = uplink % node_uuid
            cmd = ("debug update port --admin-state %s --port " +
                   intf_uuid) % oper
            ret, resp = pdsctl.ExecutePdsctlCommand(node, cmd, yaml=False)
            if ret != True:
                api.Logger.error("oper:%s uplink failed at node %s : %s" %
                                 (oper, node, resp))
                return api.types.status.FAILURE
        misc_utils.Sleep(1)  #give a short gap before printing status
        pdsctl.ExecutePdsctlShowCommand(node, "port status", yaml=False)
    return api.types.status.SUCCESS
Exemplo n.º 5
0
def FlapSwitchPort(tc, num_ports=1, down_time=0, port='any'):
    naples_nodes = api.GetNaplesHostnames()
    api.Logger.info("Flapping switch port on %s ..." % naples_nodes)
    port_num = 1

    if num_ports == 1 and port is 'any':
        port_num = random.choice([1, 2])

    ret = api.ShutDataPorts(naples_nodes, num_ports, start_port=port_num)
    if ret != api.types.status.SUCCESS:
        api.Logger.error("Failed to Shut the switch port:%s" % start_port)
        return api.types.status.FAILURE

    ret = DetectUpLinkState(naples_nodes, PORT_OPER_STATUS_DOWN, any)
    if ret != api.types.status.SUCCESS:
        api.Logger.error("Failed to detect any uplink(%s) in DOWN state." %
                         start_port)
        return api.types.status.FAILURE

    misc_utils.Sleep(down_time)

    ret = api.UnShutDataPorts(naples_nodes, num_ports, start_port=port_num)
    if ret != api.types.status.SUCCESS:
        api.Logger.error("Failed to UnShut the switch port:%s" % start_port)
        return api.types.status.FAILURE

    ret = DetectUpLinkState(naples_nodes, PORT_OPER_STATUS_UP, all)
    if ret != api.types.status.SUCCESS:
        api.Logger.error("Failed to detect any(%s) uplink in UP state." %
                         start_port)
        return api.types.status.FAILURE

    return api.types.status.SUCCESS
Exemplo n.º 6
0
def HitlessTriggerUpdateRequest(tc):
    result = api.types.status.SUCCESS
    if api.IsDryrun():
        return result

    backgroun_req = api.Trigger_CreateExecuteCommandsRequest(serial=False)
    # start upgrade manager process
    for node in tc.nodes:
        cmd = "/nic/tools/start-upgmgr.sh -n "
        api.Logger.info("Starting Upgrade Manager %s" % (cmd))
        api.Trigger_AddNaplesCommand(backgroun_req, node, cmd, background=True)
    api.Trigger(backgroun_req)

    # wait for upgrade manager to comeup
    misc_utils.Sleep(10)
    for node in tc.nodes:
        # initiate upgrade client objects
        # Generate Upgrade objects
        UpgradeClient.GenerateUpgradeObjects(node, api.GetNicMgmtIP(node))

        upg_obj = UpgradeClient.GetUpgradeObject(node)
        upg_obj.SetPkgName(tc.pkg_name)
        upg_obj.SetUpgMode(upgrade_pb2.UPGRADE_MODE_HITLESS)
        upg_status = upg_obj.UpgradeReq()
        api.Logger.info(
            f"Hitless Upgrade request for {node} returned status {upg_status}")
        if upg_status != upgrade_pb2.UPGRADE_STATUS_OK:
            api.Logger.error(f"Failed to start upgrade manager on {node}")
            result = api.types.status.FAILURE
            continue
    return result
Exemplo n.º 7
0
def Verify(tc):

    if tc.skip:
        return api.types.status.SUCCESS

    misc_utils.Sleep(5)  # let metaswitch carry this to other side
    learn_utils.DumpLearnData()
    ret = __validate_move_stats(tc.mv_ctx['src_wl'].node_name,
                                tc.mv_ctx['dst_wl'].node_name)
    if ret != api.types.status.SUCCESS:
        return api.types.status.FAILURE

    api.Logger.verbose(
        "Move statistics are matching expectation on both nodes")

    # Validate flow moves on source and destination workloads
    ret = __validate_flow_move(tc, tc.mv_ctx['src_wl'].node_name,
                               tc.mv_ctx['ip_prefix'], 'L2R')
    if ret != api.types.status.SUCCESS:
        api.Logger.error("Failed to validate flows on node %s" %
                         tc.mv_ctx['src_wl'].node_name)
        return api.types.status.FAILURE

    ret = __validate_flow_move(tc, tc.mv_ctx['dst_wl'].node_name,
                               tc.mv_ctx['ip_prefix'], 'R2L')
    if ret != api.types.status.SUCCESS:
        api.Logger.error("Failed to validate flows on node %s" %
                         tc.mv_ctx['dst_wl'].node_name)
        return api.types.status.FAILURE

    # Validate with traffic after moving
    return move_utils.ValidateEPMove()
Exemplo n.º 8
0
def Verify(tc):
    # Read Age and State. Check age if it is in Learning state still. Retry if it is in Probing state.
    max_retry = 2
    interval = 2
    retry = 0
    deviceLearnAgeTimeout = EzAccessStoreClient[
        tc.node].GetDevice().GetLearnAgeTimeout()

    while retry < max_retry:
        ret, data = learn_utils.ReadLearnIPOperData(tc.node, tc.learn_ip_obj)
        if not ret or data is None:
            return api.types.status.FAILURE
        if data['state'] == learn_pb2.EP_STATE_CREATED and data['ttl'] > (
                deviceLearnAgeTimeout - 5):
            api.Logger.debug(
                "IP endpoint is in Created state with ttl %d after refresh" %
                data['ttl'])
            return api.types.status.SUCCESS
        else:
            misc_utils.Sleep(interval)
            retry += 1

    learn_utils.DumpLearnIP(tc.node, tc.learn_ip_obj)
    if retry == max_retry:
        api.Logger.error(
            "IP endpoint did not have the expected age even after %d retries" %
            max_retry)
        return api.types.status.FAILURE

    return api.types.status.SUCCESS
Exemplo n.º 9
0
def __trim_memory():
    res = pdsctlUtils.TrimMemory()
    if not res:
        api.Logger.error(f"Failed to trim memory")
        return api.types.status.FAILURE
    # wait few seconds for memory to be reclaimed
    miscUtils.Sleep(10)
    return api.types.status.SUCCESS
Exemplo n.º 10
0
def checkUpgradeStatusViaConsole(tc):
    result = api.types.status.SUCCESS
    status_in_progress = True
    retry_count = 0
    while status_in_progress:
        misc_utils.Sleep(1)
        retry_count += 1
        if retry_count == 300:
            # break if status is still in-progress after max retries
            result = api.types.status.FAILURE
            break

        status_in_progress = False
        for node in tc.nodes:
            (resp, exit_code) = api.RunNaplesConsoleCmd(
                node, "grep -vi in-progress /update/pds_upg_status.txt", True)

            api.Logger.verbose("checking upgrade for node: %s, exit_code:%s " %
                               (node, exit_code))
            if exit_code != 0:
                status_in_progress = True
                break
            else:
                api.Logger.info(
                    "Status other than in-progress found in %s, /update/pds_upg_status.txt"
                    % node)
                lines = resp.split('\r\n')
                for line in lines:
                    api.Logger.info(line.strip())

        if retry_count % 10 == 0:
            api.Logger.info(
                "Checking for status not in-progress in file /update/pds_upg_status.txt, retries: %s"
                % retry_count)

        if status_in_progress:
            continue

        for node in tc.nodes:
            (resp, exit_code) = api.RunNaplesConsoleCmd(
                node, "grep -i success /update/pds_upg_status.txt", True)
            api.Logger.info(
                "Checking for success status in file /update/pds_upg_status.txt"
            )
            if exit_code != 0:
                result = api.types.status.FAILURE
            else:
                api.Logger.info(
                    "Success Status found in /update/pds_upg_status.txt")

    if status_in_progress:
        api.Logger.error("Upgrade Failed: Status is still IN-PROGRESS")

    return result
Exemplo n.º 11
0
def getFirstOperDownPort(node):
    misc_utils.Sleep(3)
    if GlobalOptions.dryrun:
        return api.types.status.SUCCESS

    node_uuid = EzAccessStoreClient[node].GetNodeUuid(node)
    for uplink in [UPLINK_PREFIX1, UPLINK_PREFIX2]:
        intf_uuid = uplink % node_uuid
        cmd = "port status -p " + intf_uuid
        ret, resp = pdsctl.ExecutePdsctlShowCommand(node, cmd, yaml=False)
        if ret == True and "UP          DOWN" in resp:
            return uplinkDict[uplink]
Exemplo n.º 12
0
def Teardown(tc):

    if tc.skip:
        return api.types.status.SUCCESS

    stats_utils.Clear()

    ctx = tc.mv_ctx
    ip_prefix = tc.mv_ctx['ip_prefix']
    src_wl = tc.mv_ctx['src_wl']
    dst_wl = tc.mv_ctx['dst_wl']

    move_utils.MoveEpIPEntry(dst_wl.node_name, src_wl.node_name, ip_prefix)

    misc_utils.Sleep(5)  # let metaswitch carry it to the other side
    learn_utils.DumpLearnData()
    ret = __validate_move_stats(dst_wl.node_name, src_wl.node_name)
    if ret != api.types.status.SUCCESS:
        return api.types.status.FAILURE

    api.Logger.verbose(
        "Move statistics are matching expectation on both nodes")

    # Validate flow move on src and dst.
    ret = __validate_flow_move(tc, tc.mv_ctx['src_wl'].node_name,
                               tc.mv_ctx['ip_prefix'], 'R2L')
    if ret != api.types.status.SUCCESS:
        api.Logger.error("Failed to validate flows on node %s" %
                         tc.mv_ctx['src_wl'].node_name)
        return api.types.status.FAILURE

    ret = __validate_flow_move(tc, tc.mv_ctx['dst_wl'].node_name,
                               tc.mv_ctx['ip_prefix'], 'L2R')
    if ret != api.types.status.SUCCESS:
        api.Logger.error("Failed to validate flows on node %s" %
                         tc.mv_ctx['dst_wl'].node_name)
        return api.types.status.FAILURE
    # Also validate new flows on src
    ret = __validate_flows(tc, tc.mv_ctx['src_wl'].node_name)
    if ret != api.types.status.SUCCESS:
        api.Logger.error("Failed to validate flows on node %s" % node)
        return api.types.status.FAILURE

    # Terminate background ping and check for loss
    ret = __verify_background_ping(tc)
    if ret != api.types.status.SUCCESS:
        return ret

    # Validate with traffic after moving back
    if move_utils.ValidateEPMove() != api.types.status.SUCCESS:
        return api.types.status.FAILURE

    return flow_utils.clearFlowTable(None)
Exemplo n.º 13
0
def verifyDataPortState(naples_nodes, admin, oper):
    ret = api.types.status.SUCCESS
    if GlobalOptions.dryrun:
        return ret
    retry_remaining = verifyRetry
    ret = verifyDataPortStateHelper(naples_nodes, admin, oper)
    while api.types.status.FAILURE == ret and retry_remaining > 0:
        misc_utils.Sleep(1)
        retry_remaining = retry_remaining - 1
        ret = verifyDataPortStateHelper(naples_nodes, admin, oper)

    return ret
Exemplo n.º 14
0
def __poll_upgrade_status(tc, status, **kwargs):
    not_found = True
    retry = 0

    while not_found:
        misc_utils.Sleep(1)
        not_found = False

        for node in tc.nodes:
            api.Logger.info(
                f"retry {retry}: Checking upgrade status {status.name} on {node}"
            )
            not_found = not CheckUpgradeStatus(node, status)
        retry += 1
Exemplo n.º 15
0
def __detectUpLinkState(node, state, cb, tries=6):
    PORT_TYPE_MGMT = 2
    while tries:
        uplink = GetUplinkStatus(node)
        #print("uplink List : %s"%uplink)
        arr = [
            port['status']['linkstatus']["operstate"] == state
            for port in uplink if port['spec']['porttype'] != PORT_TYPE_MGMT
        ]
        if cb(arr):
            return api.types.status.SUCCESS
        misc_utils.Sleep(1)
        tries -= 1
    return api.types.status.FAILURE
Exemplo n.º 16
0
def Teardown(tc):
    ret = learn_utils.SetWorkloadIntfOperState(tc.workload, 'up')
    if not ret:
        api.Logger.error("Failed to bringup interface for workload %s%s" %
                         (tc.learn_mac_obj.GID(), tc.node))
        return api.types.status.FAILURE
    arp_utils.SendGratArp([tc.workload])
    learn_utils.DumpLearnMAC(tc.node, tc.learn_mac_obj)
    misc_utils.Sleep(
        10)  # to let remote mappings for this VNIC, sync in other nodes
    if not learn_utils.ValidateLearnInfo():
        api.Logger.error("Learn validation failed")
        return api.types.status.FAILURE
    api.Logger.verbose("Aged out Endpoints are learnt again")
    return api.types.status.SUCCESS
Exemplo n.º 17
0
def switchPortFlap(tc):
    flap_count = 1
    num_ports = 2
    interval = 2
    down_time = 2
    naples_nodes = api.GetNaplesHostnames()

    api.Logger.info("Flapping switch port on %s ..." % naples_nodes)
    ret = api.FlapDataPorts(naples_nodes, num_ports, down_time, flap_count,
                            interval)
    if ret != api.types.status.SUCCESS:
        api.Logger.error("Failed to flap the switch port")
        return ret

    misc_utils.Sleep(60)  #give a gap to for bgp to reconcile
    return api.types.status.SUCCESS
Exemplo n.º 18
0
def __verify_learning():
    api.Logger.verbose("Verifying if all VNIC and Mappings are learnt")

    for node in api.GetNaplesHostnames():
        if not bgp_utils.ValidateBGPOverlayNeighborship(node):
            api.Logger.error("Failed in BGP Neighborship validation for node: %s" %node)
            return api.types.status.FAILURE

    learn_utils.DumpLearnData()
    # sleep for some time to let metaswitch advertise these local mappings to other naples.
    # TODO: have to find out if there is any event to wait on
    api.Logger.debug("Sleeping for sometime letting remote mappings to get programmed")
    misc_utils.Sleep(40)
    if not learn_utils.ValidateLearnInfo():
        return api.types.status.FAILURE

    return api.types.status.SUCCESS
Exemplo n.º 19
0
def Verify(tc):
    misc_utils.Sleep(40)  # letting remote mappings to get programmed
    if not learn_utils.ValidateLearnInfo():
        api.Logger.error("Learn validation failed")
        return api.types.status.FAILURE
    workload_pairs = config_api.GetWorkloadPairs(
        config_api.WORKLOAD_PAIR_TYPE_LOCAL_ONLY,
        config_api.WORKLOAD_PAIR_SCOPE_INTER_SUBNET)
    workload_pairs.extend(
        config_api.GetWorkloadPairs(
            config_api.WORKLOAD_PAIR_TYPE_REMOTE_ONLY,
            config_api.WORKLOAD_PAIR_SCOPE_INTRA_SUBNET))
    workload_pairs.extend(
        config_api.GetWorkloadPairs(
            config_api.WORKLOAD_PAIR_TYPE_REMOTE_ONLY,
            config_api.WORKLOAD_PAIR_SCOPE_INTER_SUBNET))
    return conn_utils.ConnectivityTest(workload_pairs, ['icmp'], ['ipv4'],
                                       [64], 0, 'all')
Exemplo n.º 20
0
def check_underlay_bgp_peer_connectivity(sleep_time=0, timeout_val=0):
    api.Logger.info("Starting BGP underlay validation ...")
    timeout = timeout_val  # [seconds]
    timeout_start = time.time()
    retry_count = 1
    while True:
        if ValidateBGPUnderlayNeighborshipInfo():
            return api.types.status.SUCCESS
        if timeout_val == 0 or time.time() >= timeout_start + timeout:
            break
        retry_count += 1
        api.Logger.verbose("BGP underlay is still not up, will do retry({0}) "
                           "after {1} sec...".format(retry_count, sleep_time))
        if sleep_time > 0:
            misc_utils.Sleep(sleep_time)

    api.Logger.error("BGP underlay validation failed ...")
    return api.types.status.FAILURE
Exemplo n.º 21
0
def __verify_move_stats(home, new_home, max_retry=4, interval=5):
    retry = 0
    while retry < max_retry:
        misc_utils.Sleep(interval)  # let metaswitch carry this to other side
        learn_utils.DumpLearnData()
        ret = __validate_move_stats(home, new_home)
        if ret == api.types.status.SUCCESS:
            break
        else:
            retry += 1
            api.Logger.verbose(
                f"Retrying Move statistics validation - retry {retry}")

    if retry == max_retry:
        api.Logger.error(
            "Failed to validate move statistics even after retries")
        return api.types.status.FAILURE

    return api.types.status.SUCCESS
Exemplo n.º 22
0
def switchPortOp(naples_nodes, oper, id):
    num_ports = 1
    start_port_id = 1
    if id == 'Switchport1':
        start_port_id = 2
    elif id == "Switchports":
        num_ports = 2
    api.Logger.info(f"Oper: {oper} for {id} on {naples_nodes} ...")
    if oper == 'down':
        ret = api.ShutDataPorts(naples_nodes, num_ports, start_port_id)
    else:
        ret = api.UnShutDataPorts(naples_nodes, num_ports, start_port_id)

    misc_utils.Sleep(
        60)  #TBD: temporary fix to wait 60 seconds for bgp sessions

    if ret != api.types.status.SUCCESS:
        api.Logger.error(f"Failed to bring {oper} : {id}")
        return ret

    return api.types.status.SUCCESS
Exemplo n.º 23
0
def ProcessObjectsByOperation(oper, select_objs, spec=None):
    supported_ops = [ 'Create', 'Read', 'Delete', 'Update' ]
    res = api.types.status.SUCCESS
    if oper is None or oper not in supported_ops:
        return res
    for obj in select_objs:
        if getattr(obj, oper)(spec):
            if not getattr(obj, 'Read')():
                api.Logger.error(f"read after {oper} failed for object: {obj}")
                res = api.types.status.FAILURE
        else:
            api.Logger.error(f"{oper} failed for object: {obj}")
            res = api.types.status.FAILURE
        if oper == 'Delete':
            if hasattr(obj, 'VerifyDepsOperSt'):
                # needed until delay_delete is enabled, since read can happen when obj is being deleted
                misc_utils.Sleep(3)
                if not obj.VerifyDepsOperSt(oper):
                    api.Logger.error(f"Dependent object oper state not as expected after {oper} on {obj}")
                    res = api.types.status.FAILURE
    return res
Exemplo n.º 24
0
def Trigger(tc):
    # Read Age and State. Select IP Entry that is in Learning State with higher age.
    max_retry = 5
    interval = 2
    retry = 0
    age_max = 0

    ret = learn_utils.SetWorkloadIntfOperState(tc.workload, 'down')
    if not ret:
        api.Logger.error("Failed to bringdown interface for workload %s%s" %
                         (tc.learn_mac_obj.GID(), tc.node))
        return api.types.status.FAILURE

    api.Logger.verbose("Brought interface down for workload %s" %
                       tc.learn_mac_obj)
    for learn_ip_obj in tc.learn_ip_obj_list:
        ret, data = learn_utils.ReadLearnIPOperData(tc.node, learn_ip_obj)
        if not ret or data is None:
            api.Logger.error("Failed to read IP endpoint %s from node %s" %
                             (learn_ip_obj.IP, tc.node))
            return api.types.status.FAILURE
        if data['state'] == learn_pb2.EP_STATE_CREATED and data[
                'ttl'] > age_max:
            age_max = data['ttl']

    if age_max == 0:
        # All entries are in Probing state already
        return api.types.status.SUCCESS

    learn_utils.DumpLearnIP(tc.node)
    if retry == max_retry:
        api.Logger.error(
            "IP endpoint not seen in Learning state even after %d retries" %
            max_retry)
        return api.types.status.FAILURE

    misc_utils.Sleep(age_max)
    return api.types.status.SUCCESS
Exemplo n.º 25
0
def Trigger(tc):
    ret = tc.subnet.ModifyHostInterface()
    if not ret:
        api.Logger.error(
            "Failed to modify host interface association for subnet")
        return api.types.status.FAILURE

    if api.IsDryrun():
        old_intf = new_intf = 'dryrun'
    else:
        old_intf = intf_client.FindHostInterface(
            tc.subnet.Node, tc.hostifidx).GetInterfaceName()
        new_intf = intf_client.FindHostInterface(
            tc.subnet.Node, tc.subnet.HostIfIdx[0]).GetInterfaceName()
    api.Logger.debug(
        f"Subnet moved from HostInterface {old_intf} to {new_intf}")
    misc_utils.Sleep(3)  # needed until delay_delete is enabled
    ret = tc.subnet.VerifyDepsOperSt('Delete')
    if not ret:
        api.Logger.error(
            f"Oper state is not as expected after host interface is modified")
        return api.types.status.FAILURE
    __modify_workload_interface(tc)
    return api.types.status.SUCCESS
Exemplo n.º 26
0
def CheckRolloutStatus(tc):
    retries = 0
    start_ts = datetime.now()
    result = api.types.status.FAILURE
    status_found = False
    while retries < 100:
        api.Logger.info("------Issuing Rollout get %s retry------"%retries)
        misc_utils.Sleep(2)
        retries += 1
        # get rollout status
        req = api.Trigger_CreateExecuteCommandsRequest(serial=False)
        for n in tc.Nodes:
            cmd = 'curl -k https://' + api.GetNicIntMgmtIP(n) + ':'+utils.GetNaplesMgmtPort()+'/api/v1/naples/rollout/'
            api.Trigger_AddHostCommand(req, n, cmd)
            api.Logger.info("Sending rollout status get request: %s"%(cmd))
        tc.resp = api.Trigger(req)

        try:
            for cmd in tc.resp.commands:
                api.PrintCommandResults(cmd)
        except Exception as e:
            api.Logger.error(f"Exception occured in sending rollout status get.{e}")
            continue

        for cmd in tc.resp.commands:
            if cmd.exit_code != 0:
                api.Logger.info("Rollout status get request returned failure")
                continue
            resp = json.loads(cmd.stdout)
            try:
                for item in resp['Status']['status']:
                    status_found = True
                    if not item['Op'] == 4:
                        api.Logger.info("opcode is bad for %s"%cmd.node_name)
                        result = api.types.status.FAILURE
                    if "fail" in tc.iterators.option:
                        if not item['opstatus'] == 'failure':
                            api.Logger.info("opstatus is bad for %s"%cmd.node_name)
                            result = api.types.status.FAILURE
                        if tc.iterators.option not in item['Message']:
                            api.Logger.info("message is bad")
                            result = api.types.status.FAILURE
                    else:
                        if not item['opstatus'] == 'success':
                            api.Logger.info("opstatus(%s) is bad for %s"%(item['opstatus'], cmd.node_name))
                            result = api.types.status.FAILURE
                        else:
                            api.Logger.info("Rollout status is SUCCESS for %s"%cmd.node_name)
                            result = api.types.status.SUCCESS
            except Exception as e:
                api.Logger.error("resp: ", json.dumps(resp, indent=1))
                #api.Logger.error(f"Exception occured in parsing response: {e}")
                result = api.types.status.FAILURE
                continue

        if status_found:
            break

    end_ts = datetime.now()
    # find time elapsed in retrieving rollout status and adjust the wait time for traffic test.
    timedelta = end_ts - start_ts
    time_elapsed = timedelta.days * 24 * 3600 + timedelta.seconds

    if time_elapsed < 100:
        time_elapsed = 100
        misc_utils.Sleep(time_elapsed)

    tc.sleep = (tc.sleep - time_elapsed) if (tc.sleep > time_elapsed) else 10
    return result
Exemplo n.º 27
0
def __validate_trigger():
    misc_utils.Sleep(40)  # letting metaswitch sync data
    learn_utils.DumpLearnData()
    if move_utils.ValidateEPMove() != api.types.status.SUCCESS:
        return api.types.status.FAILURE
    return api.types.status.SUCCESS
Exemplo n.º 28
0
def Verify(tc):
    result = api.types.status.SUCCESS

    if api.IsDryrun():
        # no upgrade done in case of dryrun
        return result

    upg_switchover_time = 70
    # wait for upgrade to complete. status can be found from the presence of /update/pds_upg_status.txt
    api.Logger.info(
        f"Sleep for {upg_switchover_time} secs before checking for Upgrade status"
    )
    misc_utils.Sleep(upg_switchover_time)

    if checkUpgradeStatusViaConsole(tc) != api.types.status.SUCCESS:
        api.Logger.error(
            "Failed in validation of Upgrade Manager completion status via Console"
        )
        result = api.types.status.FAILURE

    if not naples_utils.EnableReachability(tc.nodes):
        api.Logger.error(
            f"Failed to reach naples {tc.nodes} post upgrade switchover")
        result = api.types.status.FAILURE

    # verify mgmt connectivity
    if VerifyMgmtConnectivity(tc) != api.types.status.SUCCESS:
        api.Logger.error("Failed in Mgmt Connectivity Check after Upgrade .")
        result = api.types.status.FAILURE

    if result != api.types.status.SUCCESS:
        api.Logger.info("DUMP Upgrade Manager Logs")
        # Failure could be due to upgrade failure before/after switchover or
        # management connectivity failure. Hence dump the upgrade_mgr.log
        # via console for debug purpose.
        api.Logger.SetSkipLogPrefix(True)
        for node in tc.nodes:
            (resp,
             exit_code) = api.RunNaplesConsoleCmd(node,
                                                  "cat /obfl/upgrademgr.log",
                                                  True)
            if exit_code != 0:
                api.Logger.info("Failed to dump /obfl/upgrademgr.log from "
                                "node: %s, exit_code:%s " % (node, exit_code))
            else:
                api.Logger.info("Dump /obfl/upgrademgr.log from "
                                "node: %s, exit_code:%s " % (node, exit_code))
                lines = resp.split('\r\n')
                for line in lines:
                    api.Logger.info(line.strip())
        api.Logger.SetSkipLogPrefix(False)
        return api.types.status.FAILURE

    # push configs after upgrade
    UpdateConfigAfterUpgrade(tc)

    # verify PDS instances
    if check_pds_instance(tc) != api.types.status.SUCCESS:
        api.Logger.error("Failed in check_pds_instances")
        result = api.types.status.FAILURE

    if check_pds_agent_debug_data(tc) != api.types.status.SUCCESS:
        api.Logger.error("Failed in check_pds_agent_debug_data")
        result = api.types.status.FAILURE

    # TODO: verify BGP Underlay (REMOVE WHEN PING API IS UPDATED)
    if bgp_utils.check_underlay_bgp_peer_connectivity(
            sleep_time=15, timeout_val=120) != api.types.status.SUCCESS:
        api.Logger.error("Failed in underlay connectivity check")
        #return api.types.status.FAILURE

    # verify connectivity
    if VerifyConnectivity(tc) != api.types.status.SUCCESS:
        api.Logger.error("Failed in Connectivity Check after Upgrade .")
        if not skip_connectivity_failure:
            result = api.types.status.FAILURE

    if tc.upgrade_mode:
        tc.sleep = 100
        # If rollout status is failure, then no need to wait for traffic test
        if result == api.types.status.SUCCESS:
            api.Logger.info("Sleep for %s secs for traffic test to complete" %
                            tc.sleep)
            misc_utils.Sleep(tc.sleep)

        pkt_loss_duration = 0
        # terminate background traffic and calculate packet loss duration
        if tc.background:
            if ping.TestTerminateBackgroundPing(tc, tc.pktsize,\
                  pktlossverif=tc.pktlossverif) != api.types.status.SUCCESS:
                api.Logger.error(
                    "Failed in Ping background command termination.")
                result = api.types.status.FAILURE
            # calculate max packet loss duration for background ping
            pkt_loss_duration = ping.GetMaxPktLossDuration(
                tc, interval=tc.interval)
            if pkt_loss_duration != 0:
                indent = "-" * 10
                if tc.pktlossverif:
                    result = api.types.status.FAILURE
                api.Logger.error(
                    f"{indent} Packet Loss duration during UPGRADE of {tc.nodes} is {pkt_loss_duration} secs {indent}"
                )
                if tc.allowed_down_time and (pkt_loss_duration >
                                             tc.allowed_down_time):
                    api.Logger.error(
                        f"{indent} Exceeded allowed Loss Duration {tc.allowed_down_time} secs {indent}"
                    )
                    # Failing test based on longer traffic loss duration is commented for now.
                    # enable below line when needed.
                    #result = api.types.status.FAILURE
            else:
                api.Logger.info("No Packet Loss Found during UPGRADE Test")

    if upgrade_utils.VerifyUpgLog(tc.nodes, tc.GetLogsDir()):
        api.Logger.error("Failed to verify the upgrademgr logs...")

    if result == api.types.status.SUCCESS:
        api.Logger.info(f"Upgrade: Completed Successfully for {tc.nodes}")
    else:
        api.Logger.info(f"Upgrade: Failed for {tc.nodes}")
    return result
Exemplo n.º 29
0
def Verify(tc):
    result = api.types.status.SUCCESS

    if api.IsDryrun():
        return result

    # Stop Trex traffic
    if tc.trex:
        traffic_gen.stop_trex_traffic(tc.trex_peers)

    # Check upgrade status
    if tc.failure_stage != None:
        # TODO : details check on stage etc
        status = UpgStatus.UPG_STATUS_FAILED
    else:
        status = UpgStatus.UPG_STATUS_SUCCESS
    for node in tc.nodes:
        if not upgrade_utils.CheckUpgradeStatus(node, status):
            result = api.types.status.FAILURE

    # validate the configuration
    result = upgrade_utils.HitlessUpgradeValidateConfig(tc)
    if result != api.types.status.SUCCESS:
        api.Logger.info("Ignoring the configuration validation failure")
        result = api.types.status.SUCCESS

    # verify mgmt connectivity
    if traffic.VerifyMgmtConnectivity(tc.nodes) != api.types.status.SUCCESS:
        api.Logger.error("Failed in Mgmt Connectivity Check after Upgrade .")
        result = api.types.status.FAILURE

    if result != api.types.status.SUCCESS:
        api.Logger.info("DUMP Upgrade Manager Logs")
        # Failure could be due to upgrade failure before/after switchover or
        # management connectivity failure. Hence dump the upgrade_mgr.log
        # via console for debug purpose.
        api.Logger.SetSkipLogPrefix(True)
        for node in tc.nodes:
            (resp,
             exit_code) = api.RunNaplesConsoleCmd(node,
                                                  "cat /obfl/upgrademgr.log",
                                                  True)
            if exit_code != 0:
                api.Logger.info("Failed to dump /obfl/upgrademgr.log from "
                                "node: %s, exit_code:%s " % (node, exit_code))
            else:
                api.Logger.info("Dump /obfl/upgrademgr.log from "
                                "node: %s, exit_code:%s " % (node, exit_code))
                lines = resp.split('\r\n')
                for line in lines:
                    api.Logger.verbose(line.strip())
        api.Logger.SetSkipLogPrefix(False)
        return api.types.status.FAILURE

    check_pds_agent_debug_data(tc)

    # verify workload connectivity
    if VerifyConnectivity(tc) != api.types.status.SUCCESS:
        api.Logger.error("Failed in Connectivity Check after Upgrade.")
        if not SKIP_CONNECTIVITY_FAILURE:
            result = api.types.status.FAILURE

    tc.sleep = 100
    # If rollout status is failure, then no need to wait for traffic test
    if result == api.types.status.SUCCESS:
        api.Logger.info("Sleep for %s secs for traffic test to complete" %
                        tc.sleep)
        misc_utils.Sleep(tc.sleep)

    # terminate background traffic and calculate packet loss duration
    result = ping_traffic_stop_and_verify(tc)
    if result == api.types.status.SUCCESS and tc.iperf:
        result = iperf_traffic_stop_and_verify(tc)

    if upgrade_utils.VerifyUpgLog(tc.nodes, tc.GetLogsDir()):
        api.Logger.error("Failed to verify the upgrademgr logs...")

    nodes = ",".join(tc.nodes)
    if result == api.types.status.SUCCESS:
        api.Logger.info(f"Upgrade: Completed Successfully for {nodes}")
    else:
        api.Logger.error(f"Upgrade: Failed for {nodes}")
    return result
Exemplo n.º 30
0
def Verify(tc):
    if tc.skip:
        return api.types.status.SUCCESS

    if tc.pktloss_verify:
        if tc.background and tc.bg_cmd_resp is None:
            api.Logger.error("Failed in background Ping cmd trigger")
            return api.types.status.FAILURE

    if tc.resp is None:
        api.Logger.error("Received empty response for config request")
        return api.types.status.FAILURE
    else:
        for cmd in tc.resp.commands:
            api.PrintCommandResults(cmd)
            if cmd.exit_code != 0:
                api.Logger.error("Rollout request failed")
                return api.types.status.FAILURE

    # wait for upgrade to complete. status can be found from the presence of /update/pds_upg_status.txt
    api.Logger.info("Sleep for 70 secs before checking for /update/pds_upg_status.txt")
    misc_utils.Sleep(70)
    status_in_progress = True
    while status_in_progress:
        misc_utils.Sleep(1)
        req = api.Trigger_CreateExecuteCommandsRequest(serial=False)
        for node in tc.Nodes:
            api.Trigger_AddNaplesCommand(req, node, "grep -v in-progress /update/pds_upg_status.txt", timeout=2)
        api.Logger.info("Checking for status not in-progress in file /update/pds_upg_status.txt")
        resp = api.Trigger(req)

        status_in_progress = False
        for cmd_resp in resp.commands:
            #api.PrintCommandResults(cmd_resp)
            if cmd_resp.exit_code != 0:
                status_in_progress = True
                #api.Logger.info("File /update/pds_upg_status.txt not found")
            else:
                api.Logger.info("Status other than in-progress found in /update/pds_upg_status.txt")

#    # push interface config updates after upgrade completes
#    UpdateConfigAfterUpgrade(tc)

    for i in range(10):
        api.Logger.info("Sending ARPing, retry count %s"%i)
        # Send Grat Arp for learning
        arping.SendGratArp(tc.wloads)
        misc_utils.Sleep(1)

    result = CheckRolloutStatus(tc)

    # ensure connectivity after upgrade
    if VerifyConnectivity(tc) != api.types.status.SUCCESS:
        api.Logger.error("Failed in Connectivity Check Post Upgrade.")
        result = api.types.status.FAILURE

    error_str = None
    if tc.pktloss_verify:
        # If rollout status is failure, then no need to wait for traffic test
        if result == api.types.status.SUCCESS:
            api.Logger.info("Sleep for %s secs for traffic test to complete"%tc.sleep)
            misc_utils.Sleep(tc.sleep)

        pkt_loss_duration = 0
        # terminate background traffic and calculate packet loss duration
        if tc.background:
            if ping.TestTerminateBackgroundPing(tc, tc.pktsize,\
                  pktlossverif=tc.pktlossverif) != api.types.status.SUCCESS:
                api.Logger.error("Failed in Ping background command termination.")
                result = api.types.status.FAILURE
            # calculate max packet loss duration for background ping
            pkt_loss_duration = ping.GetMaxPktLossDuration(tc, interval=tc.interval)
            if pkt_loss_duration != 0:
                indent = "-" * 10
                if tc.pktlossverif:
                    result = api.types.status.FAILURE
                api.Logger.error(f"{indent} Packet Loss duration during UPGRADE of {tc.Nodes} is {pkt_loss_duration} secs {indent}")
                if tc.allowed_down_time and (pkt_loss_duration > tc.allowed_down_time):
                    api.Logger.error(f"{indent} Exceeded allowed Loss Duration {tc.allowed_down_time} secs {indent}")
                    result = api.types.status.FAILURE
            else:
                api.Logger.info("No Packet Loss Found during UPGRADE Test")

    if upgrade_utils.VerifyUpgLog(tc.Nodes, tc.GetLogsDir()):
        api.Logger.error("Failed to verify the upgrade logs")
        result = api.types.status.FAILURE
    return result