def test_eviction_fromfiles_strictgoal(): k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k.create_resource(open(TEST_DAEMONET).read()) k._build_state() p = SingleGoalEvictionDetect(k.state_objects) p.select_target_service() p.run(timeout=6600, sessionName="test_eviction_fromfiles_strictgoal") if not p.plan: raise Exception("Could not solve %s" % p.__class__.__name__) print(Scenario(p.plan).asyaml()) if p.plan: i = 0 for a in p.plan: i = i + 1 print( i, ":", a.__class__.__name__, "\n", yaml.dump( { str(k): repr(v._get_value()) if v else f"NONE_VALUE:{v}" for (k, v) in a.kwargs.items() }, default_flow_style=False)) assert "StartPod" in p.plan.__str__()
def test_stub_completion(): # create elementary cluster # render cluster # update with rendered cluster # add stub policy # run, get solution (empty?) k = KubernetesCluster() nodes = [] pods = [] node_item = Node() node_item.metadata_name = "node 1" node_item.cpuCapacity = 25 node_item.memCapacity = 25 node_item.isNull = False node_item.status = STATUS_NODE["Active"] nodes.append(node_item) # Service to detecte eviction s1 = Service() s1.metadata_name = "test-service" s1.amountOfActivePods = 0 s1.isSearched = True d = Deployment() d.spec_replicas = 6 d.NumberOfPodsOnSameNodeForDeployment = 4 pod = build_running_pod_with_d(1, 2, 2, node_item, d, None, s1, pods) k.state_objects.extend(nodes) k.state_objects.extend(pods) yamlState = convert_space_to_yaml(k.state_objects, wrap_items=True) update(''.join(yamlState)) cluster = next( filter(lambda x: isinstance(x, GlobalVar), kalc_state_objects)) cluster.policy.stub = True run()
def prepare_test_single_node_dies_2pod_killed_service_outage(): # Initialize scheduler, globalvar k = KubernetesCluster() scheduler = next(filter(lambda x: isinstance(x, Scheduler), k.state_objects)) globalVar = next(filter(lambda x: isinstance(x, GlobalVar), k.state_objects)) # initial node state n = Node() n.cpuCapacity = 5 n.memCapacity = 5 # Create running pods pod_running_1 = build_running_pod(1,2,2,n) pod_running_2 = build_running_pod(2,2,2,n) ## Set consumptoin as expected n.currentFormalCpuConsumption = 4 n.currentFormalMemConsumption = 4 n.amountOfActivePods = 2 # Service to detecte eviction s = Service() s.metadata_name = "test-service" s.amountOfActivePods = 2 s.status = STATUS_SERV["Started"] # our service has multiple pods but we are detecting pods pending issue # remove service as we are detecting service outage by a bug above pod_running_1.targetService = s pod_running_2.targetService = s pod_running_1.hasService = True pod_running_2.hasService = True k.state_objects.extend([n, pod_running_1, pod_running_2, s]) # print_objects(k.state_objects) return k, globalVar, n
def test_exclude_regexp_unit(): k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k.create_resource(open(TEST_DAEMONET).read()) k._build_state() mark_excluded(k.state_objects, "Service:front*", skip_check=False) for p in filter(lambda x: isinstance(x, Service), k.state_objects): if str(p.metadata_name) == "frontend": if p.searchable: raise ValueError("exclude doesn't work")
def test_simple_load_create_scale(): k = KubernetesCluster() k.load_dir(TEST_DEPLOYMENT_DUMP) k.create_resource(open(TEST_DEPLOYMENT1).read()) k.scale(5, "deployment/redis-master deployment/redis-master1") objects = filter(lambda x: isinstance(x, Deployment), k.state_objects) for p in objects: if len(p.podList) != 5: raise ValueError("Scale doesn't work")
def test_load_requests(): k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k.create_resource(open(TEST_DAEMONET).read()) k._build_state() objects = filter(lambda x: isinstance(x, DaemonSet), k.state_objects) for p in objects: if p.metadata_name == "fluentd-elasticsearch": return raise ValueError("Could not find service loded")
def test_pod_target_attached(): k = KubernetesCluster() k.load_dir(TEST_TARGET_DUMP) k.create_resource(open(TEST_TARGET_CREATE).read()) k._build_state() deployments = filter(lambda x: isinstance(x, Deployment), k.state_objects) for deployment in deployments: if deployment.metadata_name._get_value() == "redis-master-create": for pod in util.objDeduplicatorByName( deployment.podList._get_value()): assert pod.targetService._get_value() != None
def test_queue_status(): "test length and status of scheduler queue after load" k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k.create_resource(open(TEST_DAEMONET).read()) k._build_state() scheduler = next( filter(lambda x: isinstance(x, Scheduler), k.state_objects)) nodes = list(filter(lambda x: isinstance(x, Node), k.state_objects)) # assert scheduler.queueLength == len(nodes) assert scheduler.podQueue._get_value() assert scheduler.status == STATUS_SCHED["Changed"]
def test_anyservice_interrupted_fromfiles(): k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k.create_resource(open(TEST_DAEMONET).read()) k._build_state() mark_excluded_service(k.state_objects) p = AnyServiceInterrupted(k.state_objects) print_objects(k.state_objects) p.run(timeout=6600, sessionName="test_anyservice_interrupted_fromfiles") if not p.plan: raise Exception("Could not solve %s" % p.__class__.__name__) print(Scenario(p.plan).asyaml())
def test_load_pods_new(): k = KubernetesCluster() k.load(open(TEST_PRIORITYCLASS).read()) k.load(open(TEST_PODS).read()) k._build_state() # TODO: check if pod is fully loaded pod = k.state_objects[2 + 3] assert isinstance(pod, Pod) assert len(pod.metadata_labels._get_value()) > 0 assert pod.status == STATUS_POD["Running"] assert k.state_objects
def test_limits_for_pods_created(): k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k.create_resource(open(TEST_DAEMONET).read()) k._build_state() objects = filter(lambda x: isinstance(x, Pod), k.state_objects) for p in objects: if str(p.metadata_name).startswith("fluentd-elasticsearch") and \ p.cpuRequest > -1 and \ p.memRequest > -1 and \ p.memLimit > -1: return raise ValueError("Could not find service loded")
def test_load_limits(): k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k.create_resource(open(TEST_DEPLOYMENT).read()) k._build_state() objects = filter(lambda x: isinstance(x, Deployment), k.state_objects) for p in objects: if p.metadata_name == "redis-master" and \ p.cpuRequest > -1 and \ p.memRequest > -1 and \ p.memLimit > -1: return raise ValueError("Could not find service loded")
def test_convert_node_problem(): # Initialize scheduler, globalvar k = KubernetesCluster() scheduler = next( filter(lambda x: isinstance(x, Scheduler), k.state_objects)) globalVar = next( filter(lambda x: isinstance(x, GlobalVar), k.state_objects)) # initial node state n = Node() n.cpuCapacity = 5 n.memCapacity = 5 # Create running pods pod_running_1 = build_running_pod(1, 2, 2, n) pod_running_2 = build_running_pod(2, 2, 2, n) ## Set consumptoin as expected n.currentFormalCpuConsumption = 4 n.currentFormalMemConsumption = 4 n.amountOfActivePods = 2 pc = PriorityClass() pc.priority = 10 pc.metadata_name = "high-prio-test" # Service to detecte eviction s = Service() s.metadata_name = "test-service" s.amountOfActivePods = 2 s.status = STATUS_SERV["Started"] # our service has multiple pods but we are detecting pods pending issue # remove service as we are detecting service outage by a bug above pod_running_1.targetService = s pod_running_2.targetService = s pod_running_1.hasService = True pod_running_2.hasService = True pod_running_1.priorityClass = pc pod_running_2.priorityClass = pc d = Deployment() d.spec_replicas = 2 d.amountOfActivePods = 2 pod_running_1.hasDeployment = True pod_running_2.hasDeployment = True d.podList.add(pod_running_1) d.podList.add(pod_running_2) k.state_objects.extend([n, pod_running_1, pod_running_2, s, d, pc]) k2 = KubernetesCluster() for y in convert_space_to_yaml(k.state_objects, wrap_items=True): # print(y) k2.load(y) k2._build_state() # TODO: test node outage exclusion
def test_anydeployment_interrupted_fromfiles(): k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k.create_resource(open(TEST_DEPLOYMENT).read()) k._build_state() mark_excluded_service(k.state_objects) print("------Objects before solver processing------") print_objects(k.state_objects) p = NodeInterupted(k.state_objects) p.run(timeout=6600, sessionName="test_anydeployment_interrupted_fromfiles") if not p.plan: raise Exception("Could not solve %s" % p.__class__.__name__) print("------Objects after solver processing------") print(Scenario(p.plan).asyaml()) print_objects(k.state_objects)
def test_load_load_create_exeption(): k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k.create_resource(open(TEST_DEPLOYMENT).read()) try: k._build_state() except AssertionError as e: print(str(e)) assert str( e ) == "Error from server (AlreadyExists): deployments.apps \"redis-master\" already exists" objects = filter(lambda x: isinstance(x, Deployment), k.state_objects) for p in objects: if p.metadata_name == "redis-master": return raise ValueError("Could not find service loded")
def test_load_folder(): k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k._build_state() # check that no pods are orphan pods = list(filter(lambda x: isinstance(x, Pod), k.state_objects)) assert pods for pod in pods: assert pod.atNode._property_value != Node.NODE_NULL
def test_spec_selector_labels(): k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k._build_state() for ds in filter(lambda x: isinstance(x, Service), k.state_objects): if labelFactory.get("app", "redis-evict") in ds.spec_selector._get_value(): return raise Exception("Can not check labels load")
def test_load_folder_load_pod_labels(): k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k._build_state() for ds in filter(lambda x: isinstance(x, Pod), k.state_objects): if labelFactory.get("app", "redis-evict") in ds.metadata_labels._get_value(): return raise Exception("Can not check labels load")
def test_load_create(): k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k.apply_resource(open(TEST_DAEMONSET_APPLY).read()) k._build_state() objects = filter(lambda x: isinstance(x, DaemonSet), k.state_objects) for p in objects: if p.metadata_name == "fluentd-elasticsearch": assert len(util.objDeduplicatorByName(p.podList._get_value())) == 2 assert p.cpuRequest._get_value( ) == util.cpuConvertToAbstractProblem("10m") assert p.memRequest._get_value( ) == util.memConvertToAbstractProblem("10Mi") assert p.memLimit._get_value() == util.memConvertToAbstractProblem( "10Mi") return raise ValueError("Could not find service loded")
def test_service_load(): k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k._build_state() objects = filter(lambda x: isinstance(x, Service), k.state_objects) for p in objects: if p.metadata_name == "redis-master-create" and \ labelFactory.get("app", "redis-create") in p.metadata_labels._get_value(): return raise ValueError("Could not find service loded")
def test_priority_is_loaded(): k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k._build_state() priorityClasses = filter(lambda x: isinstance(x, PriorityClass), k.state_objects) for p in priorityClasses: if p.metadata_name == "high-priority" and p.preemptionPolicy == POLICY["PreemptLowerPriority"]\ and p.priority > 0: return raise ValueError("Could not find priority loded")
def test_heapster_load(): k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k._build_state() heapsterpod = next(filter(lambda x: isinstance(x, Pod) and \ "heapster" in str(x.metadata_name), k.state_objects)) criticalpc = next(filter(lambda x: isinstance(x, PriorityClass) \ and x.metadata_name == "system-cluster-critical", k.state_objects)) # print(heapsterpod.priorityClass._get_value(), criticalpc._get_value()) assert heapsterpod.priorityClass._get_value() == criticalpc._get_value() assert heapsterpod.priorityClass == criticalpc
def test_load_priorityclass_custom_high_priority(): k = KubernetesCluster() k.load(open(TEST_PRIORITYCLASS).read()) k._build_state() found = False for pc in filter(lambda x: isinstance(x, PriorityClass), k.state_objects): if pc.metadata_name == "high-priority": # print(pc.priority, pc.priority._get_value()) assert pc.priority > 0 found = True assert found
def test_load_folder_create_labels(): k = KubernetesCluster() # k.load_dir(TEST_CLUSTER_FOLDER) k.create_resource(open(TEST_DAEMONET).read()) k._build_state() for ds in filter(lambda x: isinstance(x, DaemonSet), k.state_objects): if labelFactory.get( "k8s-app", "fluentd-logging") in ds.metadata_labels._get_value(): return raise Exception("Can not check labels load")
def test_get_fullscript(): k = KubernetesCluster() p = Pod() p.status = STATUS_POD["Running"] p.metadata_name = "test-pod-1" p.cpuRequest = 2 p.memRequest = 2 n_orig = Node("orgi") n = Node() p.nodeSelectorList.add(n) n_orig.metadata_name = "ORIG" n_orig.currentFormalMemConsumption = 5 n_orig.currentFormalCpuConsumption = 5 n.status = STATUS_NODE["Active"] n.cpuCapacity = 10 n.memCapacity = 10 l1 = Label("a:b") l1.key = "a" l1.value = "b" l2 = Label("c:d") l2.key = "c" l2.value = "b" n.metadata_labels.add(l1) n.metadata_labels.add(l2) d = Deployment() d.metadata_name = "dep-test1" d.podList.add(p) p.hasDeployment = True p.atNode = n_orig rs = ReplicaSet() rs.metadata_name = "rs-test1" rs.metadata_ownerReferences__name = "dep-test1" # typically, you can find correct replicaSet by ownerReferences # TODO: create utililty function to do that k.state_objects.extend([d, n, p, rs]) prob = Balance_pods_and_drain_node(k.state_objects) s = k.scheduler g = k.globalvar prob.MoveRunningPodToAnotherNode(p, n_orig, n, s, g) assert len(prob.script)
def run_dir_wo_cli(DUMP_local,CHANGE_local): k = KubernetesCluster() if not (DUMP_local is None): for dump_item in DUMP_local: k.load_dir(dump_item) if not (CHANGE_local is None): for change_item in CHANGE_local: k.create_resource(open(change_item).read()) k._build_state() p = OptimisticRun(k.state_objects) print("#### run_wo_cli:") print("#### print_objects before run: #####") print(print_objects(k.state_objects)) p.run(timeout=999000, sessionName="test_OptimisticRun") if not p.plan: raise Exception("Could not solve %s" % p.__class__.__name__) print("#### print_objects after run: ######") print(print_objects(k.state_objects))
def test_cyclic_create(): k, globalVar, n = prepare_test_single_node_dies_2pod_killed_service_outage() yamlStateBeforeCreate = convert_space_to_yaml(k.state_objects, wrap_items=True) pod_pending_1 = build_pending_pod(3,2,2,n) dnew = Deployment() dnew.amountOfActivePods = 0 dnew.spec_replicas = 1 dnew.podList.add(pod_pending_1) # important to add as we extract status, priority spec from pod snew = Service() snew.metadata_name = "test-service-new" snew.amountOfActivePods = 0 pod_pending_1.targetService = snew create_objects = [dnew, snew] yamlCreate = convert_space_to_yaml(create_objects, wrap_items=False, load_logic_support=False) # snew.status = STATUS_SERV["Started"] k.state_objects.extend([pod_pending_1, dnew, snew]) yamlState = convert_space_to_yaml(k.state_objects, wrap_items=True) k2 = KubernetesCluster() for y in yamlStateBeforeCreate: # print(y) k2.load(y) for y in yamlCreate: k2.load(y, mode=KubernetesCluster.CREATE_MODE) k2._build_state() globalVar = k2.state_objects[1] # print("--- RUN 2 ---") yamlState2 = convert_space_to_yaml(k2.state_objects, wrap_items=True) # for y in yamlState2: # print(y) assert prepare_yamllist_for_diff(yamlState, ignore_names=True) == \ prepare_yamllist_for_diff(yamlState2, ignore_names=True)
def test_single_node_dies_2pod_killed_deployment_outage_invload(): k, globalVar = prepare_test_single_node_dies_2pod_killed_deployment_outage( ) yamlState = convert_space_to_yaml(k.state_objects, wrap_items=True) k2 = KubernetesCluster() load_yaml(yamlState, k2) globalVar = k2.state_objects[1] class Task_check_deployments(Check_deployments): goal = lambda self: globalVar.is_node_disrupted == True and \ globalVar.is_deployment_disrupted == True p = Task_check_deployments(k2.state_objects) p.run(timeout=200) assert "NodeOutageFinished" in "\n".join([repr(x) for x in p.plan]) assert "Initiate_killing_of_Pod_because_of_node_outage" in "\n".join( [repr(x) for x in p.plan]) # TODO: test node outage exclusion
def reload_cluster_from_yaml(k, create_objects): perform_yaml_test = True try: yamlState = convert_space_to_yaml(k.state_objects, wrap_items=True) except Exception as e: print("yaml conertion error",e) perform_yaml_test = False try: yamlCreate = convert_space_to_yaml(create_objects, wrap_items=False, load_logic_support=False) except Exception as e: print("yaml 2 conertion error",e) perform_yaml_test = False k2 = KubernetesCluster() if perform_yaml_test: for y in yamlState: k2.load(y) if perform_yaml_test: for y in yamlCreate: k2.load(y, mode=KubernetesCluster.CREATE_MODE) k2._build_state() return k2
def test_load(): k = KubernetesCluster() k.load_dir(TEST_CLUSTER_FOLDER) k._build_state() objects = filter(lambda x: isinstance(x, DaemonSet), k.state_objects) for p in objects: if p.metadata_name == "fluentd-elasticsearch": assert p.cpuRequest._get_value( ) == util.cpuConvertToAbstractProblem("400m") assert p.memRequest._get_value( ) == util.memConvertToAbstractProblem("400Mi") assert p.memLimit._get_value() == util.memConvertToAbstractProblem( "400Mi") return raise ValueError("Could not find service loded")