def do_test() -> bool: kubeconfig_path = shared_test_code.get_kubeconfig_path() print("Loading k8s config: {}".format(kubeconfig_path)) config.load_kube_config(config_file=kubeconfig_path) # Get kubectl command kubectl_cmd = shared_test_code.get_kubectl_command() # Ensure Helm Akri installation applied CRDs and set up Agent and Controller print("Checking for CRDs") if not shared_test_code.crds_applied(): print("CRDs not applied by helm chart") return False print("Checking for initial Akri state") if not shared_test_code.check_akri_state(1, 1, 0, 0, 0, 0): print("Akri not running in expected state") run("sudo {kubectl} get pods,services,akric,akrii --show-labels". format(kubectl=kubectl_cmd)) return False # Enumerate Webhook resources print("Debugging:") print("Deployment:") run("sudo {kubectl} describe deployment/{service}\ --namespace={namespace}".format(kubectl=kubectl_cmd, service=WEBHOOK_NAME, namespace=NAMESPACE)) print("ReplicaSet:") run("sudo {kubectl} describe replicaset \ --selector=app={service} \ --namespace={namespace}".format(kubectl=kubectl_cmd, service=WEBHOOK_NAME, namespace=NAMESPACE)) print("Pod:") run("sudo {kubectl} describe pod \ --selector=app={service} \ --namespace={namespace}".format(kubectl=kubectl_cmd, service=WEBHOOK_NAME, namespace=NAMESPACE)) # Apply Valid Akri Configuration print("Applying Valid Akri Configuration") # Use the template and place resources in the correct location body = TEMPLATE body["spec"]["brokerSpec"]["brokerPodSpec"]["containers"][0][ "resources"] = RESOURCES api = client.CustomObjectsApi() api.create_namespaced_custom_object(group=GROUP, version=VERSION, namespace=NAMESPACE, plural="configurations", body=body) # Check print("Retrieving Akri Configuration") akri_config = api.get_namespaced_custom_object(group=GROUP, version=VERSION, name=NAME, namespace=NAMESPACE, plural="configurations") print(akri_config) # Delete api.delete_namespaced_custom_object( group=GROUP, version=VERSION, name=NAME, namespace=NAMESPACE, plural="configurations", body=client.V1DeleteOptions(), ) # Apply Invalid Akri Configuration res = False try: print("Applying Invalid (!) Akri Configuration") # Use the template but(!) place resources in an incorrect location body = TEMPLATE body["spec"]["brokerSpec"]["brokerPodSpec"]["resources"] = RESOURCES api.create_namespaced_custom_object(group=GROUP, version=VERSION, namespace=NAMESPACE, plural="configurations", body=body) except ApiException as e: print( "As expected, Invalid Akri Configuration generates API Exception") print("Status Code: {} [{}]", e.status, e.reason) print("Response: {}".format(e.body)) res = True else: print("Expected APIException but none was thrown. This is an error!") # Debugging: check the Webhook's logs print("Webhook logs") run("sudo {kubectl} logs deployment/{service} --namespace={namespace}". format(kubectl=kubectl_cmd, service=WEBHOOK_NAME, namespace=NAMESPACE)) res = False # Save Webhook logs run("{kubectl} logs deployment/{service} --namespace={namespace} >> {file}" .format(kubectl=kubectl_cmd, service=WEBHOOK_NAME, namespace=NAMESPACE, file=WEBHOOK_LOG_PATH)) print("Akri Validating Webhook test: {}".format( "Success" if res else "Failure")) return res
def do_test(): kubeconfig_path = shared_test_code.get_kubeconfig_path() print("Loading k8s config: {}".format(kubeconfig_path)) config.load_kube_config(config_file=kubeconfig_path) # Get kubectl command kubectl_cmd = shared_test_code.get_kubectl_command() # Ensure Helm Akri installation applied CRDs and set up agent and controller print("Checking for CRDs") if not shared_test_code.crds_applied(): print("CRDs not applied by helm chart") return False print("Checking for initial Akri state") if not shared_test_code.check_akri_state(1, 1, 2, 2, 1, 2): print("Akri not running in expected state") os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) return False # Create kube client v1 = client.CoreV1Api() # Delete one of the broker pods broker_pod_selector = "{}={}".format( shared_test_code.CONFIGURATION_LABEL_NAME, shared_test_code.DEBUG_ECHO_NAME) brokers_info = shared_test_code.get_running_pod_names_and_uids( broker_pod_selector) if len(brokers_info) != 2: print("Expected to find 2 broker pods but found: {}", len(brokers_info)) os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) return False # There is a possible race condition here between when the `kubectl delete pod` returns, # when check_broker_pod_state validates that the pod is gone, and when the check_akri_state # validates that the broker pod has been restarted broker_pod_name = sorted(brokers_info.keys())[0] delete_pod_command = 'sudo {} delete pod {}'.format( kubectl_cmd, broker_pod_name) print("Deleting broker pod: {}".format(delete_pod_command)) os.system(delete_pod_command) # Wait for there to be 2 brokers pods again if not shared_test_code.check_broker_pods_state(v1, 2): print( "Akri not running in expected state after broker pod restoration should have happened" ) os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) return False restored_brokers_info = shared_test_code.get_running_pod_names_and_uids( broker_pod_selector) if len(restored_brokers_info) != 2: print("Expected to find 2 broker pods but found: {}", len(restored_brokers_info)) os.system('sudo {} get pods,services,akric,akrii --show-labels'.foramt( kubectl_cmd)) return False # Make sure that the deleted broker uid is different from the restored broker pod uid ... signifying # that the Pod was restarted print("Restored broker pod uid should differ from original broker pod uid") if brokers_info[broker_pod_name] == restored_brokers_info[broker_pod_name]: print( "Restored broker pod uid [{}] should differ from original broker pod uid [{}]" .format(brokers_info[broker_pod_name], restored_brokers_info[broker_pod_name])) os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) return False return True
def do_test(): kubeconfig_path = shared_test_code.get_kubeconfig_path() print("Loading k8s config: {}".format(kubeconfig_path)) config.load_kube_config(config_file=kubeconfig_path) # Get kubectl command kubectl_cmd = shared_test_code.get_kubectl_command() # Ensure Helm Akri installation applied CRDs and set up agent and controller print("Checking for CRDs") if not shared_test_code.crds_applied(): print("CRDs not applied by helm chart") return False print("Checking for initial Akri state") if not shared_test_code.check_akri_state(1, 1, 2, 2, 1, 2): print("Akri not running in expected state") os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) return False # Do offline scenario print("Writing to Agent pod {} that device offline".format( shared_test_code.agent_pod_name)) os.system( 'sudo {} exec -i {} -- /bin/bash -c "echo "OFFLINE" > /tmp/debug-echo-availability.txt"' .format(kubectl_cmd, shared_test_code.agent_pod_name)) print("Checking Akri state after taking device offline") if not shared_test_code.check_akri_state(1, 1, 0, 0, 0, 0): print("Akri not running in expected state after taking device offline") os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) return False # Do back online scenario print("Writing to Agent pod {} that device online".format( shared_test_code.agent_pod_name)) os.system( 'sudo {} exec -i {} -- /bin/bash -c "echo "ONLINE" > /tmp/debug-echo-availability.txt"' .format(kubectl_cmd, shared_test_code.agent_pod_name)) print("Checking Akri state after bringing device back online") if not shared_test_code.check_akri_state(1, 1, 2, 2, 1, 2): print( "Akri not running in expected state after bringing device back online" ) os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) return False # Check Akri slot reconiliation logs for success print("Check logs for Agent slot-reconciliation for pod {}".format( shared_test_code.agent_pod_name)) result = os.system( 'sudo {} logs {} | grep "get_node_slots - crictl called successfully" | wc -l | grep -v 0' .format(kubectl_cmd, shared_test_code.agent_pod_name)) if result != 0: print( "Akri failed to successfully connect to crictl via the CRI socket") return False # Do cleanup scenario print("Deleting Akri configuration: {}".format( shared_test_code.DEBUG_ECHO_NAME)) os.system("sudo {} delete akric {}".format( kubectl_cmd, shared_test_code.DEBUG_ECHO_NAME)) print("Checking Akri state after deleting configuration") if not shared_test_code.check_akri_state(1, 1, 0, 0, 0, 0): print( "Akri not running in expected state after deleting configuration") os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) return False return True
def do_test(): kubeconfig_path = shared_test_code.get_kubeconfig_path() print("Loading k8s config: {}".format(kubeconfig_path)) config.load_kube_config(config_file=kubeconfig_path) # Get kubectl command kubectl_cmd = shared_test_code.get_kubectl_command() # Ensure Helm Akri installation applied CRDs and set up agent and controller print("Checking for CRDs") if not shared_test_code.crds_applied(): print("CRDs not applied by helm chart") return False print("Checking for initial Akri state") if not shared_test_code.check_akri_state(1, 1, 2, 2, 1, 2): print("Akri not running in expected state") os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) return False # # Check agent responds to dynamic offline/online resource # print("Writing to Agent pod {} that device offline".format( shared_test_code.agent_pod_name)) os.system( 'sudo {} exec -i {} -- /bin/sh -c "echo "OFFLINE" > /tmp/debug-echo-availability.txt"' .format(kubectl_cmd, shared_test_code.agent_pod_name)) print("Checking Akri state after taking device offline") if not shared_test_code.check_akri_state(1, 1, 0, 0, 0, 0): print("Akri not running in expected state after taking device offline") os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) return False # Do back online scenario print("Writing to Agent pod {} that device online".format( shared_test_code.agent_pod_name)) os.system( 'sudo {} exec -i {} -- /bin/sh -c "echo "ONLINE" > /tmp/debug-echo-availability.txt"' .format(kubectl_cmd, shared_test_code.agent_pod_name)) print("Checking Akri state after bringing device back online") if not shared_test_code.check_akri_state(1, 1, 2, 2, 1, 2): print( "Akri not running in expected state after bringing device back online" ) os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) return False # # Check that slot reconciliation is working on agent # print("Check logs for Agent slot-reconciliation for pod {}".format( shared_test_code.agent_pod_name)) temporary_agent_log_path = "/tmp/agent_log.txt" for x in range(3): log_result = subprocess.run('sudo {} logs {} > {}'.format( kubectl_cmd, shared_test_code.agent_pod_name, temporary_agent_log_path), shell=True) if log_result.returncode == 0: print("Successfully stored Agent logs in {}".format( temporary_agent_log_path)) break print( "Failed to get logs from {} pod with result {} on attempt {} of 3". format(shared_test_code.agent_pod_name, log_result, x)) if x == 2: return False grep_result = subprocess.run([ 'grep', "get_node_slots - crictl called successfully", temporary_agent_log_path ]) if grep_result.returncode != 0: print( "Akri failed to successfully connect to crictl via the CRI socket with return value of {}", grep_result) # Log information to understand why error occurred os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) os.system('grep get_node_slots {}'.format(temporary_agent_log_path)) return False # # Check that broker is recreated if it is deleted # broker_pod_selector = "{}={}".format( shared_test_code.CONFIGURATION_LABEL_NAME, shared_test_code.DEBUG_ECHO_NAME) brokers_info = shared_test_code.get_running_pod_names_and_uids( broker_pod_selector) if len(brokers_info) != 2: print("Expected to find 2 broker pods but found: {}", len(brokers_info)) os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) return False # There is a possible race condition here between when the `kubectl delete pod` returns, # when check_broker_pod_state validates that the pod is gone, and when the check_akri_state # validates that the broker pod has been restarted broker_pod_name = sorted(brokers_info.keys())[0] delete_pod_command = 'sudo {} delete pod {}'.format( kubectl_cmd, broker_pod_name) print("Deleting broker pod: {}".format(delete_pod_command)) os.system(delete_pod_command) # Create kube client v1 = client.CoreV1Api() # Wait for there to be 2 brokers pods again if not shared_test_code.check_broker_pods_state(v1, 2): print( "Akri not running in expected state after broker pod restoration should have happened" ) os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) return False restored_brokers_info = shared_test_code.get_running_pod_names_and_uids( broker_pod_selector) if len(restored_brokers_info) != 2: print("Expected to find 2 broker pods but found: {}", len(restored_brokers_info)) os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) return False # Make sure that the deleted broker uid is different from the restored broker pod uid ... signifying # that the Pod was restarted print("Restored broker pod uid should differ from original broker pod uid") if brokers_info[broker_pod_name] == restored_brokers_info[broker_pod_name]: print( "Restored broker pod uid [{}] should differ from original broker pod uid [{}]" .format(brokers_info[broker_pod_name], restored_brokers_info[broker_pod_name])) os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) return False # Do cleanup scenario print("Deleting Akri configuration: {}".format( shared_test_code.DEBUG_ECHO_NAME)) os.system("sudo {} delete akric {}".format( kubectl_cmd, shared_test_code.DEBUG_ECHO_NAME)) print("Checking Akri state after deleting configuration") if not shared_test_code.check_akri_state(1, 1, 0, 0, 0, 0): print( "Akri not running in expected state after deleting configuration") os.system('sudo {} get pods,services,akric,akrii --show-labels'.format( kubectl_cmd)) return False return True