def test_run_couchbase_node_drain(self, cb_setup, node_type='master'): """ Test couchbase workload with node drain """ # Check worker node utilization (adm_top) get_node_resource_utilization_from_adm_top( node_type='worker', print_table=True ) # Node drain with specific node type typed_nodes = node.get_typed_nodes( node_type=node_type, num_of_nodes=1 ) typed_node_name = typed_nodes[0].name # Node maintenance - to gracefully terminate all pods on the node node.drain_nodes([typed_node_name]) # Make the node schedulable again node.schedule_nodes([typed_node_name]) # Perform cluster and Ceph health checks self.sanity_helpers.health_check() for sample in TimeoutSampler(300, 5, self.cb.result.done): if sample: break else: logging.info( "#### ....Waiting for couchbase threads to complete..." ) utils.ceph_health_check()
def test_sql_workload_simple(self, pgsql): """ This is a basic pgsql workload """ # Deployment postgres pgsql.setup_postgresql(replicas=3) # Create pgbench benchmark pgsql.create_pgbench_benchmark( replicas=3, clients=3, transactions=600 ) # Start measuring time start_time = datetime.now() # Check worker node utilization (adm_top) get_node_resource_utilization_from_adm_top( node_type='worker', print_table=True ) # Wait for pg_bench pod to initialized and complete pgsql.wait_for_pgbench_status(status=constants.STATUS_COMPLETED) # Calculate the time from running state to completed state end_time = datetime.now() diff_time = end_time - start_time log.info(f"\npgbench pod reached to completed state after {diff_time.seconds} seconds\n") # Get pgbench pods pgbench_pods = pgsql.get_pgbench_pods() # Validate pgbench run and parse logs pgsql.validate_pgbench_run(pgbench_pods)
def test_run_couchbase_node_reboot(self, cb_setup, nodes, pod_name_of_node): """ Test couchbase workload with node reboot """ if pod_name_of_node == 'couchbase': node_list = self.cb.get_couchbase_nodes() elif pod_name_of_node == 'osd': node_list = get_osd_running_nodes() elif pod_name_of_node == 'master': node_list = get_master_nodes() node_1 = get_node_objs(node_list[random.randint(0, len(node_list) - 1)]) # Check worker node utilization (adm_top) get_node_resource_utilization_from_adm_top(node_type='worker', print_table=True) get_node_resource_utilization_from_adm_top(node_type='master', print_table=True) # Restart relevant node nodes.restart_nodes(node_1) for sample in TimeoutSampler(300, 5, self.cb.result.done): if sample: break else: logging.info( "#### ....Waiting for couchbase threads to complete...") self.sanity_helpers.health_check()
def test_run_pgsql_node_drain(self, pgsql, transactions=5600, node_type="worker"): """ Test pgsql workload """ # Create pgbench benchmark pgsql.create_pgbench_benchmark(replicas=3, transactions=transactions, clients=3) # Start measuring time start_time = datetime.now() # Wait for pgbench pod to reach running state pgsql.wait_for_pgbench_status(status=constants.STATUS_RUNNING) # Check worker node utilization (adm_top) get_node_resource_utilization_from_adm_top(node_type="worker", print_table=True) # Select a node where pgbench is not running for drain typed_nodes = [ node1.name for node1 in node.get_nodes(node_type=node_type) ] filter_list = pgsql.filter_pgbench_nodes_from_nodeslist(typed_nodes) typed_node_name = filter_list[random.randint(0, len(filter_list) - 1)] log.info(f"Selected node {typed_node_name} for node drain operation") # Node maintenance - to gracefully terminate all pods on the node node.drain_nodes([typed_node_name]) # Make the node schedulable again node.schedule_nodes([typed_node_name]) # Perform cluster and Ceph health checks self.sanity_helpers.health_check(tries=40) # Wait for pg_bench pod to complete pgsql.wait_for_pgbench_status(status=constants.STATUS_COMPLETED) # Calculate the time from running state to completed state end_time = datetime.now() diff_time = end_time - start_time log.info( f"\npgbench pod reached to completed state after {diff_time.seconds} seconds\n" ) # Get pgbench pods pgbench_pods = pgsql.get_pgbench_pods() # Validate pgbench run and parse logs pgsql.validate_pgbench_run(pgbench_pods)
def test_run_couchbase_node_reboot(self, cb_setup, nodes, pod_name_of_node): """ Test couchbase workload with node reboot """ # Check worker node utilization (adm_top) get_node_resource_utilization_from_adm_top(node_type="worker", print_table=True) get_node_resource_utilization_from_adm_top(node_type="master", print_table=True) if pod_name_of_node == "couchbase": node_list = self.cb.get_couchbase_nodes() elif pod_name_of_node == "osd": node_list = get_osd_running_nodes() elif pod_name_of_node == "master": master_node = get_nodes(pod_name_of_node, num_of_nodes=1) # Restart relevant node if pod_name_of_node == "master": nodes.restart_nodes(master_node, wait=False) waiting_time = 40 log.info(f"Waiting {waiting_time} seconds...") time.sleep(waiting_time) else: restart_node = get_node_objs(node_list[random.randint( 0, len(node_list) - 1)]) nodes.restart_nodes(restart_node) # Validate all nodes and services are in READY state and up retry( (CommandFailed, TimeoutError, AssertionError, ResourceWrongStatusException), tries=60, delay=15, )(ocp.wait_for_cluster_connectivity(tries=400)) retry( (CommandFailed, TimeoutError, AssertionError, ResourceWrongStatusException), tries=60, delay=15, )(wait_for_nodes_status(timeout=1800)) bg_handler = flowtest.BackgroundOps() bg_ops = [self.cb.result] retry((CommandFailed), tries=60, delay=15)(bg_handler.wait_for_bg_operations(bg_ops, timeout=3600)) self.sanity_helpers.health_check(tries=40)
def test_run_pgsql_node_drain(self, pgsql, transactions=900, node_type="master"): """ Test pgsql workload """ # Create pgbench benchmark pgsql.create_pgbench_benchmark(replicas=3, transactions=transactions, clients=3) # Start measuring time start_time = datetime.now() # Wait for pgbench pod to reach running state pgsql.wait_for_pgbench_status(status=constants.STATUS_RUNNING) # Check worker node utilization (adm_top) get_node_resource_utilization_from_adm_top(node_type="worker", print_table=True) # Node drain with specific node type typed_nodes = node.get_nodes(node_type=node_type, num_of_nodes=1) typed_node_name = typed_nodes[0].name # Node maintenance - to gracefully terminate all pods on the node node.drain_nodes([typed_node_name]) # Make the node schedulable again node.schedule_nodes([typed_node_name]) # Perform cluster and Ceph health checks self.sanity_helpers.health_check() # Wait for pg_bench pod to complete pgsql.wait_for_pgbench_status(status=constants.STATUS_COMPLETED) # Calculate the time from running state to completed state end_time = datetime.now() diff_time = end_time - start_time log.info( f"\npgbench pod reached to completed state after {diff_time.seconds} seconds\n" ) # Get pgbench pods pgbench_pods = pgsql.get_pgbench_pods() # Validate pgbench run and parse logs pgsql.validate_pgbench_run(pgbench_pods)
def test_run_pgsql_reboot_node(self, pgsql, nodes, transactions, pod_name): """ Test pgsql workload """ # Create pgbench benchmark pgsql.create_pgbench_benchmark(replicas=3, transactions=transactions, clients=3) # Start measuring time start_time = datetime.now() # Wait for pgbench pod to reach running state pgsql.wait_for_pgbench_status(status=constants.STATUS_RUNNING) # Choose a node based on pod it contains if pod_name == 'postgres': node_list = pgsql.get_pgsql_nodes() elif pod_name == 'osd': node_list = get_osd_running_nodes() node_1 = get_node_objs(node_list[random.randint(0, len(node_list) - 1)]) # Check worker node utilization (adm_top) get_node_resource_utilization_from_adm_top(node_type='worker', print_table=True) # Restart relevant node nodes.restart_nodes(node_1) # Wait for pg_bench pod to complete pgsql.wait_for_pgbench_status(status=constants.STATUS_COMPLETED) # Calculate the time from running state to completed state end_time = datetime.now() diff_time = end_time - start_time log.info( f"\npgbench pod reached to completed state after {diff_time.seconds} seconds\n" ) # Get pgbench pods pgbench_pods = pgsql.get_pgbench_pods() # Validate pgbench run and parse logs pgsql.validate_pgbench_run(pgbench_pods) # Perform cluster and Ceph health checks self.sanity_helpers.health_check()
def test_run_pgsql_reboot_node(self, pgsql, nodes, transactions, pod_name): """ Test pgsql workload """ # Create pgbench benchmark pgsql.create_pgbench_benchmark(replicas=1, transactions=transactions) # Start measuring time start_time = datetime.now() # Wait for pgbench pod to reach running state pgsql.wait_for_pgbench_status(status=constants.STATUS_RUNNING) # Select a node where pgbench is not running and reboot osd_nodes_list = get_osd_running_nodes() node_list = pgsql.filter_pgbench_nodes_from_nodeslist(osd_nodes_list) node_1 = get_node_objs(node_list[random.randint(0, len(node_list) - 1)]) log.info(f"Selected node {node_1} for reboot operation") # Check worker node utilization (adm_top) get_node_resource_utilization_from_adm_top(node_type="worker", print_table=True) # Restart relevant node nodes.restart_nodes(node_1) # Wait for pg_bench pod to complete pgsql.wait_for_pgbench_status(status=constants.STATUS_COMPLETED) # Calculate the time from running state to completed state end_time = datetime.now() diff_time = end_time - start_time log.info( f"\npgbench pod reached to completed state after {diff_time.seconds} seconds\n" ) # Get pgbench pods pgbench_pods = pgsql.get_pgbench_pods() # Validate pgbench run and parse logs pgsql.validate_pgbench_run(pgbench_pods) # Perform cluster and Ceph health checks self.sanity_helpers.health_check(tries=40)
def test_run_pgsql_respin_pod(self, pgsql, transactions, pod_name): """ Test pgsql workload """ # Create pgbench benchmark pgsql.create_pgbench_benchmark(replicas=3, transactions=transactions, clients=3) # Start measuring time start_time = datetime.now() # Wait for pgbench pod to reach running state pgsql.wait_for_pgbench_status(status=constants.STATUS_RUNNING) # Check worker node utilization(adm_top) get_node_resource_utilization_from_adm_top(node_type="worker", print_table=True) # Respin relevant pod if pod_name == "postgres": pgsql.respin_pgsql_app_pod() else: log.info(f"Respin Ceph pod {pod_name}") disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=f"{pod_name}") disruption.delete_resource() # Wait for pg_bench pod to complete pgsql.wait_for_pgbench_status(status=constants.STATUS_COMPLETED) # Calculate the time from running state to completed state end_time = datetime.now() diff_time = end_time - start_time log.info( f"\npgbench pod reached to completed state after {diff_time.seconds} seconds\n" ) # Get pgbench pods pgbench_pods = pgsql.get_pgbench_pods() # Validate pgbench run and parse logs pgsql.validate_pgbench_run(pgbench_pods) # Perform cluster and Ceph health checks self.sanity_helpers.health_check(tries=40)
def test_scale_pgsql(self, pgsql): """ Test case to scale pgsql pods: * Add worker nodes to existing cluster * Label new worker node * Create pgsql to run on 200 pods on new added worker node """ replicas = 200 # Number of postgres and pgbench pods to be deployed timeout = (replicas * 100 ) # Time in seconds to wait for pgbench pods to be created # Add workers node to cluster scale_pgsql.add_worker_node() # Check ceph health status utils.ceph_health_check() # Deployment postgres pgsql.setup_postgresql(replicas=replicas, node_selector=constants.SCALE_NODE_SELECTOR) # Create pgbench benchmark pgsql.create_pgbench_benchmark(replicas=replicas, clients=5, transactions=60, timeout=timeout) # Check worker node utilization (adm_top) get_node_resource_utilization_from_adm_top(node_type="worker", print_table=True) # Wait for pg_bench pod to initialized and complete pgsql.wait_for_pgbench_status(status=constants.STATUS_COMPLETED, timeout=timeout) # Get pgbench pods pgbench_pods = pgsql.get_pgbench_pods() # Validate pgbench run and parse logs pgsql.validate_pgbench_run(pgbench_pods) # Check ceph health status utils.ceph_health_check()
def test_run_couchbase_node_drain(self, cb_setup, node_type="master"): """ Test couchbase workload with node drain """ # Check worker node utilization (adm_top) get_node_resource_utilization_from_adm_top(node_type="worker", print_table=True) # Node drain with specific node type typed_nodes = node.get_nodes(node_type=node_type, num_of_nodes=1) typed_node_name = typed_nodes[0].name # Node maintenance - to gracefully terminate all pods on the node node.drain_nodes([typed_node_name]) # Make the node schedulable again node.schedule_nodes([typed_node_name]) # Perform cluster and Ceph health checks bg_handler = flowtest.BackgroundOps() bg_ops = [self.cb.result] bg_handler.wait_for_bg_operations(bg_ops, timeout=3600) self.sanity_helpers.health_check()
def test_pgsqlperf_workload(self, pgsql): """ Testcase to setup postgres database pod and run pgbench benchmark to measure the performance marker """ # Deployment of postgres pod pgsql.setup_postgresql(replicas=1) # Create pgbench benchmark pgsql.create_pgbench_benchmark(replicas=1, transactions=100) # Check worker node utilization get_node_resource_utilization_from_adm_top(node_type="worker", print_table=True) # Wait for pg_bench pod to be initialized and completed pgsql.wait_for_pgbench_status(status=constants.STATUS_COMPLETED) # Get pgbench pods pgbench_pods = pgsql.get_pgbench_pods() # Validate pgbench run and measure the TPS, Latency pgsql.validate_pgbench_perf(pgbench_pods)