def remove_node(self, otpnode=None, wait_for_rebalance=True): nodes = self.rest.node_statuses() '''This is the case when master node is running cbas service as well''' if len(nodes) <= len(otpnode): return helper = RestHelper(self.rest) try: removed = helper.remove_nodes( knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in otpnode], wait_for_rebalance=wait_for_rebalance) except Exception as e: self.sleep( 5, "First time rebalance failed on Removal. Wait and try again. THIS IS A BUG." ) removed = helper.remove_nodes( knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in otpnode], wait_for_rebalance=wait_for_rebalance) if wait_for_rebalance: self.assertTrue( removed, "Rebalance operation failed while removing %s," % otpnode)
def test_eventing_rebalance_in_when_existing_eventing_node_is_processing_mutations(self): self.create_save_handlers() self.deploy_all_handlers() # load data self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default",wait_for_loading=False) self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1",wait_for_loading=False) # rebalance in a eventing node when eventing is processing mutations services_in = ["eventing"] rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], [self.servers[self.nodes_init]], [], services=services_in) reached = RestHelper(self.rest).rebalance_reached(retry_count=150) self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result() # Wait for eventing to catch up with all the update mutations and verify results after rebalance self.verify_all_handler(self.docs_per_day * self.num_docs) self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs * 2) # delete json documents self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default",is_delete=True) self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1",is_delete=True) # Wait for eventing to catch up with all the delete mutations and verify results self.verify_all_handler(0) self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs) self.undeploy_delete_all_functions() # Get all eventing nodes nodes_out_list = self.get_nodes_from_services_map(service_type="eventing", get_all_nodes=True) # rebalance out all eventing nodes rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init + 1], [], nodes_out_list) reached = RestHelper(self.rest).rebalance_reached(retry_count=150) self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result()
def test_eventing_rebalance_in_kill_eventing_consumer(self): eventing_node = self.get_nodes_from_services_map(service_type="eventing", get_all_nodes=False) sock_batch_size = self.input.param('sock_batch_size', 1) worker_count = self.input.param('worker_count', 3) cpp_worker_thread_count = self.input.param('cpp_worker_thread_count', 1) body = self.create_save_function_body(self.function_name, self.handler_code, sock_batch_size=sock_batch_size, worker_count=worker_count, cpp_worker_thread_count=cpp_worker_thread_count) if self.is_curl: body['depcfg']['curl'] = [] body['depcfg']['curl'].append({"hostname": self.hostname, "value": "server", "auth_type": self.auth_type, "username": self.curl_username, "password": self.curl_password,"cookies": self.cookies}) self.deploy_function(body) # load data self.load(self.gens_load, buckets=self.src_bucket, flag=self.item_flag, verify_data=False, batch_size=self.batch_size) if self.pause_resume: self.pause_function(body, wait_for_pause=False) # rebalance in a eventing node when eventing is processing mutations services_in = ["eventing"] rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], [self.servers[self.nodes_init]], [], services=services_in) self.sleep(5) reached = RestHelper(self.rest).rebalance_reached(percentage=60) self.assertTrue(reached, "rebalance failed, stuck or did not complete") # kill eventing consumer when eventing is processing mutations self.kill_consumer(eventing_node) self.wait_for_handler_state(body['appname'], "deployed") rebalance.result() if self.pause_resume: self.resume_function(body) # Wait for eventing to catch up with all the update mutations and verify results after rebalance if self.is_sbm: self.verify_eventing_results(self.function_name, self.docs_per_day * 2016*2, skip_stats_validation=True) else: self.verify_eventing_results(self.function_name, self.docs_per_day * 2016, skip_stats_validation=True) # delete json documents self.load(self.gens_load, buckets=self.src_bucket, flag=self.item_flag, verify_data=False, batch_size=self.batch_size, op_type='delete') if self.pause_resume: self.pause_function(body) self.sleep(30) self.resume_function(body) # kill eventing consumer when eventing is processing mutations self.kill_consumer(eventing_node) self.wait_for_handler_state(body['appname'], "deployed") # Wait for eventing to catch up with all the delete mutations and verify results # This is required to ensure eventing works after rebalance goes through successfully if self.is_sbm: self.verify_eventing_results(self.function_name, self.docs_per_day * 2016, skip_stats_validation=True) else: self.verify_eventing_results(self.function_name, 0, skip_stats_validation=True) self.undeploy_and_delete_function(body) # Get all eventing nodes nodes_out_list = self.get_nodes_from_services_map(service_type="eventing", get_all_nodes=True) # rebalance out all eventing nodes rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init + 1], [], nodes_out_list) reached = RestHelper(self.rest).rebalance_reached() self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result()
def test_create_scope_collection_rebalance_kv(self): bucket_name = "bucket1" scope_name = "scope1" collection_name = "collection1" self.cluster.create_standard_bucket(bucket_name, 11222, self.bucket_params) rebalance_result = self.cluster.async_rebalance( self.servers, [], [self.servers[1]]) try: scope_created = self.cli_helper.create_scope(bucket=bucket_name, scope=scope_name) self.assertTrue(scope_created, "Cannot create scope during rebalance") collection_created = self.cli_helper.create_collection( bucket=bucket_name, scope="_default", collection=collection_name) self.assertTrue(collection_created, "Cannot create collection during rebalance.") finally: #wait until rebalance is done RestHelper(self.rest).rebalance_reached(retry_count=150) time_limit = 100 while time_limit > 0: if RestHelper(self.rest).is_cluster_rebalanced(): break else: time_limit = time_limit - 1 self.sleep(10, "Waiting for rebalance finish.")
def test_multiple_handle_multiple_collections_swap_rebalance_kv(self): # load data self.load_data_to_all_source_collections() self.create_n_handler(self.num_handlers, self.num_src_buckets, self.num_dst_buckets, self.handler_code) self.deploy_n_handler(self.deploy_handler, sequential=self.sequential) self.wait_for_handlers_to_deployed() # swap rebalance an kv node when eventing is processing mutations services_in = ["kv"] nodes_out_kv = self.servers[1] rebalance = self.cluster.async_rebalance( self.servers[:self.nodes_init], [self.servers[self.nodes_init]], [nodes_out_kv], services=services_in) reached = RestHelper(self.rest).rebalance_reached(retry_count=150) self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result() self.verify_destination_buckets(self.docs_per_day * self.num_docs) # delete load data self.load_data_to_all_source_collections(is_delete=True) self.verify_destination_buckets(0) self.undeploy_delete_all_handler() # Get all eventing nodes nodes_out_list = self.get_nodes_from_services_map( service_type="eventing", get_all_nodes=True) # rebalance out all eventing nodes rebalance = self.cluster.async_rebalance( self.servers[:self.nodes_init + 1], [], nodes_out_list) reached = RestHelper(self.rest).rebalance_reached(retry_count=150) self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result()
def setUp(self): super(FTSServerGroups, self).setUp() self.rest = RestConnection(self._cb_cluster.get_master_node()) self.helper = RestHelper(self.rest) self.default_group_name = "Group 1" self.fts_query = {"match": "emp", "field": "type"} self._cleanup_server_groups()
def test_eventing_rebalance_with_multiple_eventing_nodes(self): self.create_save_handlers() self.deploy_all_handlers() # load data task1=self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default", wait_for_loading=False) task2=self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1", wait_for_loading=False) # rebalance in a eventing nodes when eventing is processing mutations services_in = ["eventing", "eventing"] to_add_nodes = self.servers[self.nodes_init:self.nodes_init + 2] rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], to_add_nodes, [], services=services_in) reached = RestHelper(self.rest).rebalance_reached(retry_count=150) self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result() task1.result() task2.result() # Wait for eventing to catch up with all the update mutations and verify results after rebalance self.verify_all_handler(self.docs_per_day * self.num_docs) self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs * 2) # delete json documents task1=self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default", is_delete=True, wait_for_loading=False) task2=self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1", is_delete=True, wait_for_loading=False) # Get all eventing nodes nodes_out_list = self.get_nodes_from_services_map(service_type="eventing", get_all_nodes=True) # Remove 2 eventing nodes to_remove_nodes = nodes_out_list[0:2] self.log.info("Rebalance out eventing nodes {}".format(to_remove_nodes)) # rebalance out 2 eventing nodes rebalance1 = self.cluster.async_rebalance(self.servers[:self.nodes_init + 2], [], to_remove_nodes) reached1 = RestHelper(self.rest).rebalance_reached(retry_count=150) self.assertTrue(reached1, "rebalance failed, stuck or did not complete") rebalance1.result() task1.result() task2.result() # Wait for eventing to catch up with all the delete mutations and verify results self.verify_all_handler(0) self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs) all_eventing_nodes = self.get_nodes_from_services_map(service_type="eventing", get_all_nodes=True) self.log.info("Eventing Nodes after rebalance out {}".format(all_eventing_nodes)) #self.master = self.get_nodes_from_services_map(service_type="kv") # add the previously removed nodes as part of swap rebalance for node in to_remove_nodes: self.rest.add_node(self.master.rest_username, self.master.rest_password, node.ip, node.port, services=["eventing"]) self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default", wait_for_loading=False) self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1", wait_for_loading=False) rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], [], all_eventing_nodes) reached = RestHelper(self.rest).rebalance_reached(retry_count=150) self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result() self.verify_all_handler(self.docs_per_day * self.num_docs) self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs*3) self.undeploy_delete_all_functions()
def test_eventing_rebalance_with_n2n_encryption_and_enforce_tls(self): ntonencryptionBase().disable_nton_cluster([self.master]) body = self.create_save_function_body( self.function_name, "handler_code/ABO/insert_rebalance.js") self.load(self.gens_load, buckets=self.src_bucket, flag=self.item_flag, verify_data=False, batch_size=self.batch_size) self.deploy_function(body) self.verify_doc_count_collections("dst_bucket._default._default", self.docs_per_day * self.num_docs) for level in ["control", "all", "strict"]: if self.pause_resume: self.pause_function(body) else: self.undeploy_function(body) ntonencryptionBase().setup_nton_cluster( [self.master], clusterEncryptionLevel=level) if self.x509enable: self.upload_x509_certs(self.servers[self.nodes_init]) services_in = ["eventing"] rebalance = self.cluster.async_rebalance( self.servers[:self.nodes_init], [self.servers[self.nodes_init]], [], services=services_in) reached = RestHelper(self.rest).rebalance_reached(retry_count=150) self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result() if self.pause_resume: self.resume_function(body) else: self.deploy_function(body) self.load(self.gens_load, buckets=self.src_bucket, flag=self.item_flag, verify_data=False, batch_size=self.batch_size, op_type='delete') self.verify_doc_count_collections("dst_bucket._default._default", 0) rebalance = self.cluster.async_rebalance( self.servers[:self.nodes_init], [], [self.servers[self.nodes_init]]) reached = RestHelper(self.rest).rebalance_reached(retry_count=150) self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result() self.load(self.gens_load, buckets=self.src_bucket, flag=self.item_flag, verify_data=False, batch_size=self.batch_size) self.verify_doc_count_collections( "dst_bucket._default._default", self.docs_per_day * self.num_docs) self.undeploy_and_delete_function(body)
def test_ns_server_with_rebalance_failover_with_redaction_enabled(self): kv_node = self.get_nodes_from_services_map(service_type="kv", get_all_nodes=False) rest = RestConnection(self.master) # load bucket and do some ops gen_create = BlobGenerator('logredac', 'logredac-', self.value_size, end=self.num_items) self._load_all_buckets(self.master, gen_create, "create", 0) gen_delete = BlobGenerator('logredac', 'logredac-', self.value_size, start=self.num_items / 2, end=self.num_items) gen_update = BlobGenerator('logredac', 'logredac-', self.value_size, start=self.num_items + 1, end=self.num_items * 3 / 2) self._load_all_buckets(self.master, gen_delete, "create", 0) self._load_all_buckets(self.master, gen_update, "create", 0) # set log redaction level, collect logs, verify log files exist and verify them for redaction self.set_redaction_level() self.start_logs_collection() services_in = ["kv"] to_add_nodes = [self.servers[self.nodes_init]] rebalance = self.cluster.async_rebalance( self.servers[:self.nodes_init], to_add_nodes, [], services=services_in) reached = RestHelper(rest).rebalance_reached() self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result() # failover a node server_failed_over = self.servers[self.nodes_init] fail_over_task = self.cluster.async_failover( [self.master], failover_nodes=[server_failed_over], graceful=True) fail_over_task.result() rebalance = self.cluster.async_rebalance( self.servers[:self.nodes_init], [], [server_failed_over]) reached = RestHelper(rest).rebalance_reached() self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result() result = self.monitor_logs_collection() log.info(result) logs_path = result["perNode"]["[email protected]"]["path"] redactFileName = logs_path.split('/')[-1] nonredactFileName = logs_path.split('/')[-1].replace('-redacted', '') remotepath = logs_path[0:logs_path.rfind('/') + 1] self.verify_log_files_exist(remotepath=remotepath, redactFileName=redactFileName, nonredactFileName=nonredactFileName) self.verify_log_redaction(remotepath=remotepath, redactFileName=redactFileName, nonredactFileName=nonredactFileName, logFileName="ns_server.debug.log")
def test_swap_rebalance_with_different_topologies(self): self.server_out = self.input.param("server_out") self.services_in = self.input.param("services_in") self.create_save_handlers() self.deploy_all_handlers() # load data self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default", wait_for_loading=False) self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1", wait_for_loading=False) nodes_out_list = self.servers[self.server_out] # do a swap rebalance self.rest.add_node(self.master.rest_username, self.master.rest_password, self.servers[self.nodes_init].ip, self.servers[self.nodes_init].port, services=[self.services_in]) rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], [], [nodes_out_list]) reached = RestHelper(self.rest).rebalance_reached(retry_count=150) self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result() # Wait for eventing to catch up with all the update mutations and verify results after rebalance self.verify_all_handler(self.docs_per_day * self.num_docs) self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs * 2) # delete json documents self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default", is_delete=True, wait_for_loading=False) self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1", is_delete=True, wait_for_loading=False) # Wait for eventing to catch up with all the delete mutations and verify results self.verify_all_handler(0) self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs) self.undeploy_delete_all_functions()
def remove_node(self,otpnode=None, wait_for_rebalance=True): nodes = self.rest.node_statuses() '''This is the case when master node is running cbas service as well''' if len(nodes) <= len(otpnode): return helper = RestHelper(self.rest) try: removed = helper.remove_nodes(knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in otpnode], wait_for_rebalance=wait_for_rebalance) except Exception as e: self.sleep(5,"First time rebalance failed on Removal. Wait and try again. THIS IS A BUG.") removed = helper.remove_nodes(knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in otpnode], wait_for_rebalance=wait_for_rebalance) if wait_for_rebalance: self.assertTrue(removed, "Rebalance operation failed while removing %s,"%otpnode)
def test_opposite_address_family_is_blocked(self): services_in = [] for service in self.services_in.split("-"): services_in.append(service.split(":")[0]) # Validate before the test starts self._validate_ip_addrress_family() nodes_in = self.servers[self.nodes_init:] rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], nodes_in, [], services=services_in) self.sleep(2) rest = RestConnection(self.master) reached = RestHelper(rest).rebalance_reached(percentage=30) if self.change_addr_family: if self.ipv4_only: cli = CouchbaseCLI(self.master, self.master.rest_username, self.master.rest_password) cli.setting_autofailover(0, 60) _, _, success = cli.set_ip_family("ipv6only") if not success: self.fail("Unable to change ip-family to ipv6only") self.check_ip_family_enforcement(ip_family="ipv6_only") self.sleep(2) _, _, success = cli.set_ip_family("ipv4only") if not success: self.fail("Unable to change ip-family to ipv4only") cli.setting_autofailover(1, 60) self.check_ip_family_enforcement(ip_family="ipv4_only") if self.ipv6_only: cli = CouchbaseCLI(self.master, self.master.rest_username, self.master.rest_password) cli.setting_autofailover(0, 60) _, _, success = cli.set_ip_family("ipv4only") if not success: self.fail("Unable to change ip-family to ipv4only") self.check_ip_family_enforcement(ip_family="ipv4_only") self.sleep(2) _, _, success = cli.set_ip_family("ipv6only") if not success: self.fail("Unable to change ip-family to ipv6only") cli.setting_autofailover(1, 60) self.check_ip_family_enforcement(ip_family="ipv6_only") self.assertTrue(reached, "rebalance failed, stuck or did not complete") # Validate during rebalance self._validate_ip_addrress_family() rebalance.result() self.sleep(20) # Validate post rebalance self._validate_ip_addrress_family() # Reboot the master node shell = RemoteMachineShellConnection(self.master) shell.reboot_node() self.sleep(180) # Validate post reboot self._validate_ip_addrress_family()
def test_eventing_rebalance_in_delete_recreate_collections(self): self.create_save_handlers() self.deploy_all_handlers() # load data self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default",wait_for_loading=False) self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1",wait_for_loading=False) # rebalance in a eventing node when eventing is processing mutations services_in = ["eventing"] rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], [self.servers[self.nodes_init]], [], services=services_in) self.collection_rest.delete_collection("dst_bucket","scope_1","coll_0") self.collection_rest.delete_collection("dst_bucket","scope_1","coll_1") self.collection_rest.delete_collection("dst_bucket","scope_1","coll_2") reached = RestHelper(self.rest).rebalance_reached(retry_count=150) self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result() # Wait for eventing to catch up with all the update mutations and verify results after rebalance # self.verify_all_handler(self.docs_per_day * self.num_docs) self.verify_doc_count_collections("dst_bucket.scope_1.coll_3", self.docs_per_day * self.num_docs) self.verify_doc_count_collections("dst_bucket.scope_1.coll_4", self.docs_per_day * self.num_docs) self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs*2) # rebalance in a eventing node when eventing is processing mutations services_in = ["eventing"] rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init+1], [self.servers[self.nodes_init+1]], [], services=services_in) self.collection_rest.create_collection("dst_bucket", "scope_1", "coll_0") self.collection_rest.create_collection("dst_bucket", "scope_1", "coll_1") self.collection_rest.create_collection("dst_bucket", "scope_1", "coll_2") # delete json documents self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default", is_delete=True) self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1", is_delete=True) reached = RestHelper(self.rest).rebalance_reached(retry_count=150) self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result() self.verify_doc_count_collections("dst_bucket.scope_1.coll_3", 0) self.verify_doc_count_collections("dst_bucket.scope_1.coll_4", 0) self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs) self.undeploy_delete_all_functions()
def test_eventing_rebalance_swap_delete_recreate_collections(self): self.create_save_handlers() self.deploy_all_handlers() # load data self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default",wait_for_loading=False) self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1",wait_for_loading=False) # swap rebalance an eventing node when eventing is processing mutations services_in = ["eventing"] nodes_out_ev = self.get_nodes_from_services_map(service_type="eventing", get_all_nodes=True) rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], [self.servers[self.nodes_init]], nodes_out_ev, services=services_in) self.collection_rest.delete_collection("dst_bucket", "scope_1", "coll_0") self.collection_rest.delete_collection("dst_bucket", "scope_1", "coll_1") self.collection_rest.delete_collection("dst_bucket", "scope_1", "coll_2") reached = RestHelper(self.rest).rebalance_reached(retry_count=150) self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result() self.verify_doc_count_collections("dst_bucket.scope_1.coll_3", self.docs_per_day * self.num_docs) self.verify_doc_count_collections("dst_bucket.scope_1.coll_4", self.docs_per_day * self.num_docs) self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs * 2) # rebalance out a eventing node when eventing is processing mutations services_in = ["eventing"] nodes_out_ev = self.get_nodes_from_services_map(service_type="eventing", get_all_nodes=True) rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init+1], [self.servers[self.nodes_init+1]], nodes_out_ev, services=services_in) self.collection_rest.create_collection("dst_bucket", "scope_1", "coll_0") self.collection_rest.create_collection("dst_bucket", "scope_1", "coll_1") self.collection_rest.create_collection("dst_bucket", "scope_1", "coll_2") # delete json documents self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default", is_delete=True) self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1", is_delete=True) reached = RestHelper(self.rest).rebalance_reached(retry_count=150) self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result() self.verify_all_handler(0) self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs) self.undeploy_delete_all_functions()
def test_n1ql_gc_rebalance(self): self.n1ql_helper.create_primary_index(using_gsi=True, server=self.n1ql_node) self.load_sample_buckets(self.server, "travel-sample") worker_count = self.input.param('worker_count', 12) body = self.create_save_function_body(self.function_name, self.handler_code, worker_count=worker_count) self.deploy_function(body) # load data self.load(self.gens_load, buckets=self.src_bucket, flag=self.item_flag, verify_data=False, batch_size=self.batch_size) if self.pause_resume: self.pause_function(body) # rebalance in a eventing node when eventing is processing mutations services_in = ["eventing"] rebalance = self.cluster.async_rebalance( self.servers[:self.nodes_init], [self.servers[self.nodes_init]], [], services=services_in) reached = RestHelper(self.rest).rebalance_reached(retry_count=150) self.assertTrue(reached, "rebalance failed, stuck or did not complete") rebalance.result() if self.pause_resume: self.resume_function(body) # Wait for eventing to catch up with all the update mutations and verify results after rebalance self.verify_eventing_results(self.function_name, self.docs_per_day * 2016, skip_stats_validation=True) # delete json documents self.load(self.gens_load, buckets=self.src_bucket, flag=self.item_flag, verify_data=False, batch_size=self.batch_size, op_type='delete') if self.pause_resume: self.pause_function(body) self.sleep(30) self.resume_function(body) # Wait for eventing to catch up with all the delete mutations and verify results self.verify_eventing_results(self.function_name, 0, skip_stats_validation=True) self.undeploy_and_delete_function(body)
def test_volume(self): nodes_in_cluster = [self.servers[0]] print "Start Time: %s" % str( time.strftime("%H:%M:%S", time.gmtime(time.time()))) ######################################################################################################################## self.log.info("Add a N1QL/Index nodes") self.query_node = self.servers[1] rest = RestConnection(self.query_node) rest.set_data_path(data_path=self.query_node.data_path, index_path=self.query_node.index_path, cbas_path=self.query_node.cbas_path) result = self.add_node(self.query_node, rebalance=False) self.assertTrue(result, msg="Failed to add N1QL/Index node.") self.log.info("Add a KV nodes") result = self.add_node(self.servers[2], services=["kv"], rebalance=True) self.assertTrue(result, msg="Failed to add KV node.") nodes_in_cluster = nodes_in_cluster + [ self.servers[1], self.servers[2] ] ######################################################################################################################## self.log.info("Step 2: Create Couchbase buckets.") self.create_required_buckets() for node in nodes_in_cluster: NodeHelper.do_a_warm_up(node) NodeHelper.wait_service_started(node) ######################################################################################################################## self.log.info( "Step 3: Create 10M docs average of 1k docs for 8 couchbase buckets." ) env = DefaultCouchbaseEnvironment.builder().mutationTokensEnabled( True).computationPoolSize(5).socketConnectTimeout( 100000).connectTimeout(100000).maxRequestLifetime( TimeUnit.SECONDS.toMillis(300)).build() cluster = CouchbaseCluster.create(env, self.master.ip) cluster.authenticate("Administrator", "password") bucket = cluster.openBucket("GleambookUsers") pool = Executors.newFixedThreadPool(5) items_start_from = 0 total_num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 5 num_items = total_num_items / num_executors for i in xrange(doc_executors): executors.append( GleambookUser_Docloader(bucket, num_items, items_start_from + i * num_items, batch_size=2000)) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) updates_from = items_start_from deletes_from = items_start_from + total_num_items / 10 items_start_from += total_num_items ######################################################################################################################## self.sleep(120, "Sleeping after 1st cycle.") self.log.info("Step 8: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 4 executors.append( GleambookUser_Docloader(bucket, num_items / 10, updates_from, "update")) executors.append( GleambookUser_Docloader(bucket, num_items / 10, deletes_from, "delete")) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) ######################################################################################################################## self.sleep(120, "Sleeping after 2nd cycle.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 5 num_items = total_num_items / doc_executors for i in xrange(doc_executors): executors.append( GleambookUser_Docloader(bucket, num_items, items_start_from + i * num_items, batch_size=2000)) rebalance = self.cluster.async_rebalance(nodes_in_cluster, [self.servers[3]], []) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) rebalance.get_result() reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") bucket.close() cluster.disconnect() print "End Time: %s" % str( time.strftime("%H:%M:%S", time.gmtime(time.time())))
def test_cbcollect_with_redaction_enabled_with_xdcr(self): rest_src = RestConnection(self.master) rest_src.remove_all_replications() rest_src.remove_all_remote_clusters() rest_dest = RestConnection(self.servers[1]) rest_dest_helper = RestHelper(rest_dest) try: rest_src.remove_all_replications() rest_src.remove_all_remote_clusters() self.set_redaction_level() rest_src.add_remote_cluster(self.servers[1].ip, self.servers[1].port, self.servers[1].rest_username, self.servers[1].rest_password, "C2") """ at dest cluster """ self.add_built_in_server_user(node=self.servers[1]) rest_dest.create_bucket(bucket='default', ramQuotaMB=512) bucket_ready = rest_dest_helper.vbucket_map_ready('default') if not bucket_ready: self.fail("Bucket default at dest not created after 120 seconds.") repl_id = rest_src.start_replication('continuous', 'default', "C2") if repl_id is not None: self.log.info("Replication created successfully") gen = BlobGenerator("ent-backup", "ent-backup-", self.value_size, end=self.num_items) tasks = self._async_load_all_buckets(self.master, gen, "create", 0) for task in tasks: task.result() self.sleep(10) """ enable firewall """ if self.interrupt_replication: RemoteUtilHelper.enable_firewall(self.master, xdcr=True) """ start collect logs """ self.start_logs_collection() result = self.monitor_logs_collection() """ verify logs """ try: logs_path = result["perNode"]["ns_1@" + str(self.master.ip)]["path"] except KeyError: logs_path = result["perNode"]["[email protected]"]["path"] redactFileName = logs_path.split('/')[-1] nonredactFileName = logs_path.split('/')[-1].replace('-redacted', '') remotepath = logs_path[0:logs_path.rfind('/')+1] self.verify_log_files_exist(remotepath=remotepath, redactFileName=redactFileName, nonredactFileName=nonredactFileName) self.log.info("Verify on log ns_server.goxdcr.log") self.verify_log_redaction(remotepath=remotepath, redactFileName=redactFileName, nonredactFileName=nonredactFileName, logFileName="ns_server.goxdcr.log") finally: """ clean up xdcr """ rest_dest.delete_bucket() rest_src.remove_all_replications() rest_src.remove_all_remote_clusters() if self.interrupt_replication: shell = RemoteMachineShellConnection(self.master) shell.disable_firewall() shell.disconnect()
def test_cbcollect_with_redaction_enabled_with_xdcr(self): rest_src = RestConnection(self.master) rest_src.remove_all_replications() rest_src.remove_all_remote_clusters() rest_dest = RestConnection(self.servers[1]) rest_dest_helper = RestHelper(rest_dest) try: rest_src.remove_all_replications() rest_src.remove_all_remote_clusters() self.set_redaction_level() rest_src.add_remote_cluster(self.servers[1].ip, self.servers[1].port, self.servers[1].rest_username, self.servers[1].rest_password, "C2") """ at dest cluster """ self.add_built_in_server_user(node=self.servers[1]) rest_dest.create_bucket(bucket='default', ramQuotaMB=512) bucket_ready = rest_dest_helper.vbucket_map_ready('default') if not bucket_ready: self.fail( "Bucket default at dest not created after 120 seconds.") repl_id = rest_src.start_replication('continuous', 'default', "C2") if repl_id is not None: self.log.info("Replication created successfully") gen = BlobGenerator("ent-backup", "ent-backup-", self.value_size, end=self.num_items) tasks = self._async_load_all_buckets(self.master, gen, "create", 0) for task in tasks: task.result() self.sleep(10) """ enable firewall """ if self.interrupt_replication: RemoteUtilHelper.enable_firewall(self.master, xdcr=True) """ start collect logs """ self.start_logs_collection() result = self.monitor_logs_collection() """ verify logs """ try: logs_path = result["perNode"]["ns_1@" + str(self.master.ip)]["path"] except KeyError: logs_path = result["perNode"]["[email protected]"]["path"] redactFileName = logs_path.split('/')[-1] nonredactFileName = logs_path.split('/')[-1].replace( '-redacted', '') remotepath = logs_path[0:logs_path.rfind('/') + 1] self.verify_log_files_exist(remotepath=remotepath, redactFileName=redactFileName, nonredactFileName=nonredactFileName) self.log.info("Verify on log ns_server.goxdcr.log") self.verify_log_redaction(remotepath=remotepath, redactFileName=redactFileName, nonredactFileName=nonredactFileName, logFileName="ns_server.goxdcr.log") finally: """ clean up xdcr """ rest_dest.delete_bucket() rest_src.remove_all_replications() rest_src.remove_all_remote_clusters() if self.interrupt_replication: shell = RemoteMachineShellConnection(self.master) shell.disable_firewall() shell.disconnect()
def test_ups_volume(self): nodes_in_cluster = [self.servers[0]] print "Start Time: %s" % str( time.strftime("%H:%M:%S", time.gmtime(time.time()))) ######################################################################################################################## self.log.info("Add a KV nodes - 2") self.query_node = self.servers[1] rest = RestConnection(self.servers[1]) rest.set_data_path(data_path=self.servers[1].data_path, index_path=self.servers[1].index_path, cbas_path=self.servers[1].cbas_path) result = self.add_node(self.servers[1], rebalance=False) self.assertTrue(result, msg="Failed to add N1QL/Index node.") self.log.info("Add a KV nodes - 3") rest = RestConnection(self.servers[2]) rest.set_data_path(data_path=self.kv_servers[1].data_path, index_path=self.kv_servers[1].index_path, cbas_path=self.kv_servers[1].cbas_path) result = self.add_node(self.kv_servers[1], services=["kv"], rebalance=False) self.assertTrue(result, msg="Failed to add KV node.") self.log.info("Add one more KV node") rest = RestConnection(self.servers[3]) rest.set_data_path(data_path=self.kv_servers[3].data_path, index_path=self.kv_servers[3].index_path, cbas_path=self.kv_servers[3].cbas_path) result = self.add_node(self.kv_servers[3], services=["kv"], rebalance=False) self.assertTrue(result, msg="Failed to add KV node.") self.log.info("Add one more KV node") rest = RestConnection(self.servers[4]) rest.set_data_path(data_path=self.kv_servers[4].data_path, index_path=self.kv_servers[4].index_path, cbas_path=self.kv_servers[4].cbas_path) result = self.add_node(self.kv_servers[4], services=["kv"], rebalance=False) self.assertTrue(result, msg="Failed to add KV node.") nodes_in_cluster = nodes_in_cluster + [ self.servers[1], self.servers[2], self.servers[3], self.servers[4] ] ######################################################################################################################## self.log.info("Step 2: Create Couchbase buckets.") self.create_required_buckets() ######################################################################################################################## self.log.info( "Step 3: Create 10M docs average of 1k docs for 8 couchbase buckets." ) env = DefaultCouchbaseEnvironment.builder().mutationTokensEnabled( True).computationPoolSize(5).socketConnectTimeout( 100000).connectTimeout(100000).maxRequestLifetime( TimeUnit.SECONDS.toMillis(300)).build() cluster = CouchbaseCluster.create(env, self.master.ip) cluster.authenticate("Administrator", "password") bucket = cluster.openBucket("GleambookUsers") msg_bucket = cluster.openBucket("GleambookMessages") pool = Executors.newFixedThreadPool(5) items_start_from = 0 total_num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 5 num_items = total_num_items / num_executors for i in xrange(doc_executors): executors.append( GleambookUser_Docloader(bucket, num_items, items_start_from + i * num_items)) executors.append( GleambookMessages_Docloader(msg_bucket, num_items, items_start_from + i * num_items)) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) updates_from = items_start_from deletes_from = items_start_from + total_num_items / 10 items_start_from += total_num_items ######################################################################################################################## self.log.info("Step 6: Verify the items count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 8: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 4 executors.append( GleambookUser_Docloader(bucket, num_items / 10, updates_from, "update")) executors.append( GleambookUser_Docloader(bucket, num_items / 10, deletes_from, "delete")) executors.append( GleambookMessages_Docloader(msg_bucket, num_items / 10, updates_from, "update")) executors.append( GleambookMessages_Docloader(msg_bucket, num_items / 10, deletes_from, "delete")) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) ######################################################################################################################## self.log.info("Step 9: Connect cbas buckets.") self.connect_cbas_buckets() self.sleep(10, "Wait for the ingestion to complete") ######################################################################################################################## self.log.info("Step 10: Verify the items count.") self.validate_items_count() ######################################################################################################################## self.log.info( "Step 12: When 11 is in progress do a KV Rebalance in of 1 nodes.") rest = RestConnection(self.servers[5]) rest.set_data_path(data_path=self.servers[5].data_path, index_path=self.servers[5].index_path, cbas_path=self.servers[5].cbas_path) rebalance = self.cluster.async_rebalance(nodes_in_cluster, [self.servers[5]], []) nodes_in_cluster += [self.servers[2]] ######################################################################################################################## self.log.info("Step 11: Create 10M docs.") pool = Executors.newFixedThreadPool(5) total_num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 4 num_items = total_num_items / doc_executors for i in xrange(doc_executors): executors.append( GleambookUser_Docloader(bucket, num_items, items_start_from + i * num_items)) executors.append( GleambookMessages_Docloader(msg_bucket, num_items, items_start_from + i * num_items)) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) updates_from = items_start_from deletes_from = items_start_from + total_num_items / 10 items_start_from += total_num_items ######################################################################################################################## self.log.info("Step 13: Wait for rebalance to complete.") rebalance.get_result() reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.sleep(20) ######################################################################################################################## self.log.info("Step 14: Verify the items count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 15: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 4 executors.append( GleambookUser_Docloader(bucket, num_items / 10, updates_from, "update")) executors.append( GleambookUser_Docloader(bucket, num_items / 10, deletes_from, "delete")) executors.append( GleambookMessages_Docloader(msg_bucket, num_items / 10, updates_from, "update")) executors.append( GleambookMessages_Docloader(msg_bucket, num_items / 10, deletes_from, "delete")) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) ######################################################################################################################## self.log.info( "Step 16: Verify Results that 1M docs gets deleted from analytics datasets." ) self.validate_items_count() ######################################################################################################################## self.log.info("Step 17: Disconnect CBAS buckets.") self.disconnect_cbas_buckets() ######################################################################################################################## self.log.info("Step 18: Create 10M docs.") pool = Executors.newFixedThreadPool(5) total_num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 4 num_items = total_num_items / doc_executors for i in xrange(doc_executors): executors.append( GleambookUser_Docloader(bucket, num_items, items_start_from + i * num_items)) executors.append( GleambookMessages_Docloader(msg_bucket, num_items, items_start_from + i * num_items)) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) updates_from = items_start_from deletes_from = items_start_from + total_num_items / 10 items_start_from += total_num_items ######################################################################################################################## self.log.info("Step 20: Verify the docs count.") self.validate_items_count() ######################################################################################################################## pool = Executors.newFixedThreadPool(5) executors = [] num_executors = 5 self.log.info( "Step 22: When 21 is in progress do a KV Rebalance out of 2 nodes." ) rebalance = self.cluster.async_rebalance(nodes_in_cluster, [], self.servers[1:3]) nodes_in_cluster = [ node for node in nodes_in_cluster if node not in self.servers[1:3] ] futures = pool.invokeAll(executors) self.log.info("Step 23: Wait for rebalance.") rebalance.get_result() reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.sleep(20) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) ######################################################################################################################## self.log.info("Step 24: Create 10M docs.") pool = Executors.newFixedThreadPool(5) total_num_items = self.input.param("num_items", 5000) executors = [] num_executors = 6 doc_executors = 4 num_items = total_num_items / doc_executors for i in xrange(doc_executors): executors.append( GleambookUser_Docloader(bucket, num_items, items_start_from + i * num_items)) executors.append( GleambookMessages_Docloader(msg_bucket, num_items, items_start_from + i * num_items)) ##################################################### NEED TO BE UPDATED ################################################################## self.log.info( "Step 25: When 24 is in progress do a KV Rebalance in of 2 nodes.") for node in self.servers[1:3]: rest = RestConnection(node) rest.set_data_path(data_path=node.data_path, index_path=node.index_path, cbas_path=node.cbas_path) rebalance = self.cluster.async_rebalance(nodes_in_cluster, self.servers[1:3], []) nodes_in_cluster = nodes_in_cluster + self.servers[1:3] futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) updates_from = items_start_from deletes_from = items_start_from + total_num_items / 10 items_start_from += total_num_items self.log.info("Step 27: Wait for rebalance to complete.") rebalance.get_result() reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") ######################################################################################################################## self.log.info("Step 28: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 29: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 4 executors.append( GleambookUser_Docloader(bucket, num_items / 10, updates_from, "update")) executors.append( GleambookUser_Docloader(bucket, num_items / 10, deletes_from, "delete")) executors.append( GleambookMessages_Docloader(msg_bucket, num_items / 10, updates_from, "update")) executors.append( GleambookMessages_Docloader(msg_bucket, num_items / 10, deletes_from, "delete")) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) ######################################################################################################################## self.log.info("Step 30: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 31: Create 10M docs.") pool = Executors.newFixedThreadPool(5) total_num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 4 num_items = total_num_items / doc_executors for i in xrange(doc_executors): executors.append( GleambookUser_Docloader(bucket, num_items, items_start_from + i * num_items)) executors.append( GleambookMessages_Docloader(msg_bucket, num_items, items_start_from + i * num_items)) ###################################################### NEED TO BE UPDATED ################################################################## self.log.info( "Step 32: When 31 is in progress do a KV Rebalance out of 2 nodes." ) rebalance = self.cluster.async_rebalance(nodes_in_cluster, [], self.servers[1:3]) nodes_in_cluster = [ node for node in nodes_in_cluster if node not in self.servers[1:3] ] futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) updates_from = items_start_from deletes_from = items_start_from + total_num_items / 10 items_start_from += total_num_items ######################################################################################################################## self.log.info("Step 33: Wait for rebalance to complete.") rebalance.get_result() reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.sleep(20) ######################################################################################################################## self.log.info("Step 34: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 35: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 4 executors.append( GleambookUser_Docloader(bucket, num_items / 10, updates_from, "update")) executors.append( GleambookUser_Docloader(bucket, num_items / 10, deletes_from, "delete")) executors.append( GleambookMessages_Docloader(msg_bucket, num_items / 10, updates_from, "update")) executors.append( GleambookMessages_Docloader(msg_bucket, num_items / 10, deletes_from, "delete")) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) ######################################################################################################################## self.log.info("Step 36: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 37: Create 10M docs.") pool = Executors.newFixedThreadPool(5) total_num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 4 num_items = total_num_items / doc_executors for i in xrange(doc_executors): executors.append( GleambookUser_Docloader(bucket, num_items, items_start_from + i * num_items)) executors.append( GleambookMessages_Docloader(msg_bucket, num_items, items_start_from + i * num_items)) ###################################################### NEED TO BE UPDATED ################################################################## self.log.info( "Step 38: When 37 is in progress do a CBAS SWAP Rebalance of 2 nodes." ) for node in self.cbas_servers[-1:]: rest = RestConnection(node) rest.set_data_path(data_path=node.data_path, index_path=node.index_path, cbas_path=node.cbas_path) rebalance = self.cluster.async_rebalance(nodes_in_cluster, self.servers[6], [self.servers[5]], services=["kv"], check_vbucket_shuffling=False) nodes_in_cluster += self.servers[6] nodes_in_cluster.remove(self.servers[5]) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) ######################################################################################################################## self.log.info("Step 39: Wait for rebalance to complete.") rebalance.get_result() reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.sleep(20) updates_from = items_start_from deletes_from = items_start_from + total_num_items / 10 items_start_from += total_num_items ######################################################################################################################## self.log.info("Step 40: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 41: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 4 executors.append( GleambookUser_Docloader(bucket, num_items / 10, updates_from, "update")) executors.append( GleambookUser_Docloader(bucket, num_items / 10, deletes_from, "delete")) executors.append( GleambookMessages_Docloader(msg_bucket, num_items / 10, updates_from, "update")) executors.append( GleambookMessages_Docloader(msg_bucket, num_items / 10, deletes_from, "delete")) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) ######################################################################################################################## self.log.info("Step 42: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 43: Create 10M docs.") pool = Executors.newFixedThreadPool(5) total_num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 4 num_items = total_num_items / doc_executors for i in xrange(doc_executors): executors.append( GleambookUser_Docloader(bucket, num_items, items_start_from + i * num_items)) executors.append( GleambookMessages_Docloader(msg_bucket, num_items, items_start_from + i * num_items)) ###################################################### NEED TO BE UPDATED ################################################################## self.log.info("Step 44: When 43 is in progress do a KV Rebalance IN.") rest = RestConnection(self.servers[5]) rest.set_data_path(data_path=self.servers[5].data_path, index_path=self.servers[5].index_path, cbas_path=self.servers[5].cbas_path) rebalance = self.cluster.async_rebalance(nodes_in_cluster, [self.servers[5]], [], services=["kv"]) nodes_in_cluster += [self.servers[5]] self.assertTrue(reached, "rebalance failed, stuck or did not complete") futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) ######################################################################################################################## self.log.info("Step 45: Wait for rebalance to complete.") rebalance.get_result() reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.sleep(20) updates_from = items_start_from deletes_from = items_start_from + total_num_items / 10 items_start_from += total_num_items ######################################################################################################################## self.log.info("Step 46: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 47: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 4 executors.append( GleambookUser_Docloader(bucket, num_items / 10, updates_from, "update")) executors.append( GleambookUser_Docloader(bucket, num_items / 10, deletes_from, "delete")) executors.append( GleambookMessages_Docloader(msg_bucket, num_items / 10, updates_from, "update")) executors.append( GleambookMessages_Docloader(msg_bucket, num_items / 10, deletes_from, "delete")) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) ######################################################################################################################## self.log.info("Step 48: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 49: Create 10M docs.") pool = Executors.newFixedThreadPool(5) total_num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 4 num_items = total_num_items / doc_executors for i in xrange(doc_executors): executors.append( GleambookUser_Docloader(bucket, num_items, items_start_from + i * num_items)) executors.append( GleambookMessages_Docloader(msg_bucket, num_items, items_start_from + i * num_items)) ######################################################################################################################## self.log.info( "Step 50: When 49 is in progress do a KV+CBAS Rebalance OUT.") rest = RestConnection(self.servers[6]) rest.set_data_path(data_path=self.servers[6].data_path, index_path=self.servers[6].index_path, cbas_path=self.kv_servers[6].cbas_path) rebalance = self.cluster.async_rebalance(nodes_in_cluster, [], [self.servers[6]]) nodes_in_cluster.remove(self.servers[6]) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) ######################################################################################################################## self.log.info("Step 51: Wait for rebalance to complete.") rebalance.get_result() reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.sleep(20) updates_from = items_start_from deletes_from = items_start_from + total_num_items / 10 items_start_from += total_num_items ######################################################################################################################## self.log.info("Step 52: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 53: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 4 executors.append( GleambookUser_Docloader(bucket, num_items / 10, updates_from, "update")) executors.append( GleambookUser_Docloader(bucket, num_items / 10, deletes_from, "delete")) executors.append( GleambookMessages_Docloader(msg_bucket, num_items / 10, updates_from, "update")) executors.append( GleambookMessages_Docloader(msg_bucket, num_items / 10, deletes_from, "delete")) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) ######################################################################################################################## self.log.info("Step 54: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 55: Create 10M docs.") pool = Executors.newFixedThreadPool(5) total_num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 4 num_items = total_num_items / doc_executors for i in xrange(doc_executors): executors.append( GleambookUser_Docloader(bucket, num_items, items_start_from + i * num_items)) executors.append( GleambookMessages_Docloader(msg_bucket, num_items, items_start_from + i * num_items)) ######################################################################################################################## self.log.info( "Step 56: When 55 is in progress do a KV+CBAS SWAP Rebalance .") rest = RestConnection(self.servers[7]) rest.set_data_path(data_path=self.servers[7].data_path, index_path=self.servers[7].index_path, cbas_path=self.servers[7].cbas_path) rebalance = self.cluster.async_rebalance(nodes_in_cluster, [self.servers[7]], [self.servers[6]]) # rebalance.get_result() nodes_in_cluster.remove(self.servers[6]) nodes_in_cluster += [self.servers[7]] futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) ######################################################################################################################## self.log.info("Step 57: Wait for rebalance to complete.") rebalance.get_result() reached = RestHelper(self.rest).rebalance_reached(wait_step=240) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.sleep(20) updates_from = items_start_from deletes_from = items_start_from + total_num_items / 10 items_start_from += total_num_items ######################################################################################################################## self.log.info("Step 58: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 59: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items", 5000) executors = [] num_executors = 5 doc_executors = 4 executors.append( GleambookUser_Docloader(bucket, num_items / 10, updates_from, "update")) executors.append( GleambookUser_Docloader(bucket, num_items / 10, deletes_from, "delete")) executors.append( GleambookMessages_Docloader(msg_bucket, num_items / 10, updates_from, "update")) executors.append( GleambookMessages_Docloader(msg_bucket, num_items / 10, deletes_from, "delete")) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) ######################################################################################################################## self.log.info("Step 60: Verify the docs count.") self.validate_items_count() bucket.close() msg_bucket.close() cluster.disconnect() print "End Time: %s" % str( time.strftime("%H:%M:%S", time.gmtime(time.time())))
def test_volume(self): nodes_in_cluster = [self.servers[0]] print "Start Time: %s" % str( time.strftime("%H:%M:%S", time.gmtime(time.time()))) ####################################################################### self.log.info("Step 1: Add a N1QL/Index nodes") self.query_node = self.servers[1] rest = RestConnection(self.query_node) rest.set_data_path(data_path=self.query_node.data_path, index_path=self.query_node.index_path, cbas_path=self.query_node.cbas_path) result = self.add_node(self.query_node, rebalance=False) self.assertTrue(result, msg="Failed to add N1QL/Index node.") self.log.info("Step 2: Add a KV nodes") result = self.add_node(self.servers[2], services=["kv"], rebalance=True) self.assertTrue(result, msg="Failed to add KV node.") nodes_in_cluster = nodes_in_cluster + [ self.servers[1], self.servers[2] ] ####################################################################### self.log.info("Step 3: Create Couchbase buckets.") self.create_required_buckets() ####################################################################### env = DefaultCouchbaseEnvironment.builder().mutationTokensEnabled( True).computationPoolSize(5).socketConnectTimeout( 10000000).connectTimeout(10000000).maxRequestLifetime( TimeUnit.SECONDS.toMillis(1200)).build() try: System.setProperty("com.couchbase.forceIPv4", "false") logger = Logger.getLogger("com.couchbase.client") logger.setLevel(Level.SEVERE) for h in logger.getParent().getHandlers(): if isinstance(h, ConsoleHandler): h.setLevel(Level.SEVERE) cluster = CouchbaseCluster.create(env, self.master.ip) cluster.authenticate("Administrator", "password") self.bucket = cluster.openBucket("GleambookUsers") self.msg_bucket = cluster.openBucket("GleambookMessages") except CouchbaseException: print "cannot login from user: %s/%s" % (self.username, self.password) raise self.c = cluster self.items_start_from = 0 self.total_num_items = self.input.param("num_items", 5000) self.load_data() self.sleep(20, "Sleeping after 4th step.") self.validate_items_count() self.log.info("Step 4: Add node") result = self.add_node(self.servers[3], rebalance=False) self.assertTrue(result, msg="Failed to add node.") self.log.info("Step 5: Loading %s items" % self.total_num_items) self.load_data() self.log.info("Step 6: Rebalance Cluster") rebalance = self.rebalance() reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") nodes_in_cluster = nodes_in_cluster + [self.servers[3]] self.log.info("Step 7: Start Verification") self.validate_items_count() self.check_snap_start_corruption() ####################################################################### self.sleep(20) self.log.info("Step 8: Delete/Update docs.") self.update_data() self.log.info("Step 9: Verifying Data") self.validate_items_count() self.check_snap_start_corruption() ####################################################################### self.log.info("Step 10: Removing node and Rebalance cluster") rebalance = self.cluster.async_rebalance(nodes_in_cluster, [], [self.servers[3]]) nodes_in_cluster.remove(self.servers[3]) self.log.info("Step 11: Loading %s items" % self.total_num_items) self.load_data() rebalance.get_result() reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.log.info("Step 12: Verifying Data") self.validate_items_count() self.check_snap_start_corruption() ####################################################################### self.sleep(20) self.log.info("Step 13: Delete/Update docs.") self.update_data() self.log.info("Step 14: Verifying Data") self.validate_items_count() self.check_snap_start_corruption() ####################################################################### self.sleep(20) self.log.info("Step 15: Add node") result = self.add_node(self.servers[3], rebalance=False) nodes_in_cluster = nodes_in_cluster + [self.servers[3]] self.log.info("Step 16: Loading %s items" % self.total_num_items) self.load_data() self.log.info("Step 17: Rebalancing Cluster") rebalance = self.cluster.async_rebalance(nodes_in_cluster, [], [self.servers[2]]) rebalance.get_result() reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") nodes_in_cluster.remove(self.servers[2]) self.log.info("Step 18: Verifying Data") self.validate_items_count() self.check_snap_start_corruption() ####################################################################### self.sleep(20) self.log.info("Step 19: Delete/Update docs.") self.update_data() self.log.info("Step 20: Verifying Data") self.validate_items_count() self.check_snap_start_corruption() ####################################################################### self.sleep(20) self.log.info("Step 21: Add node") result = self.add_node(self.servers[2], rebalance=False) self.log.info("Step 22: Loading %s items" % self.total_num_items) self.load_data() self.log.info("Step 23: Rebalancing Cluster") rebalance = self.rebalance() reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") nodes_in_cluster = nodes_in_cluster + [self.servers[2]] self.log.info("Step 24: Verifying Data") self.validate_items_count() self.check_snap_start_corruption() ####################################################################### self.sleep(20) self.log.info("Step 25: Delete/Update docs.") self.update_data() self.log.info("Step 26: Verifying Data") self.validate_items_count() self.check_snap_start_corruption() ####################################################################### self.sleep(20) self.log.info("Step 27: Add node") result = self.add_node(self.servers[4], rebalance=False) self.log.info("Step 28: Loading %s items" % self.total_num_items) self.load_data() self.log.info("Step 29: Rebalancing Cluster") rebalance = self.rebalance() nodes_in_cluster = nodes_in_cluster + [self.servers[4]] reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.log.info("Step 30: Verifying Data") self.validate_items_count() self.check_snap_start_corruption() ####################################################################### self.sleep(20) self.log.info("Step 31: Delete/Update docs.") self.update_data() self.log.info("Step 32: Verifying Data") self.validate_items_count() self.check_snap_start_corruption() ####################################################################### self.sleep(20) self.log.info("Step 33: Removing node, Rebalancing Cluster") rebalance = self.cluster.async_rebalance(nodes_in_cluster, [], [self.servers[3]]) nodes_in_cluster.remove(self.servers[3]) self.log.info("Step 34: Loading %s items" % self.total_num_items) self.load_data() rebalance.get_result() reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.log.info("Step 35: Verifying Data") self.validate_items_count() self.check_snap_start_corruption() ####################################################################### self.sleep(20) self.log.info("Step 36: Adding 3 nodes") otp1 = self.add_node(self.servers[5], rebalance=False) otp2 = self.add_node(self.servers[6], rebalance=False) otp3 = self.add_node(self.servers[7], rebalance=False) self.log.info("Step 37: Loading %s items" % self.total_num_items) self.load_data() self.log.info("Step 38: Rebalancing Cluster") rebalance = self.rebalance() nodes_in_cluster = nodes_in_cluster + [ self.servers[5], self.servers[6], self.servers[7] ] reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.log.info("Step 39: Verifying Data") self.validate_items_count() self.check_snap_start_corruption() ####################################################################### self.log.info("Step 40: Graceful failover node") self.rest.fail_over(otp3.id, graceful=True) self.log.info("Step 41: Loading %s items" % self.total_num_items) self.load_data() self.sleep(10) reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.log.info("Step 42: Rebalancing Cluster") rebalance = self.rebalance() nodes_in_cluster.remove(self.servers[7]) reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") ####################################################################### self.log.info("Step 43: Adding node and rebalancing") otp3 = self.add_node(self.servers[7], rebalance=True) nodes_in_cluster = nodes_in_cluster + [self.servers[7]] ####################################################################### self.log.info("Step 44: Graceful failover node") self.rest.fail_over(otp3.id, graceful=True) self.log.info("Step 41: Loading %s items" % self.total_num_items) self.load_data() self.sleep(10) reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.log.info("Step 45: Delta recover node") self.rest.set_recovery_type(otp3.id, "delta") self.log.info("Step 46: Add node back to cluster") self.rest.add_back_node(otp3.id) rebalance = self.rebalance() reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.log.info("Step 47: Graceful failover node") self.rest.fail_over(otp2.id, graceful=True) self.log.info("Step 48: Loading %s items" % self.total_num_items) self.load_data() self.sleep(10) reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.log.info("Step 49: Delta recover node") self.rest.set_recovery_type(otp2.id, "full") self.log.info("Step 50: Add node back to cluster") self.rest.add_back_node(otp2.id) rebalance = self.rebalance() reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.bucket.close() self.msg_bucket.close() cluster.disconnect()
class PerfBase(unittest.TestCase): """ specURL = http://hub.internal.couchbase.org/confluence/display/cbit/Black+Box+Performance+Test+Matrix """ # The setUpBaseX() methods allow subclasses to resequence the setUp() and # skip cluster configuration. def setUpBase0(self): self.log = logger.Logger.get_logger() self.input = TestInputSingleton.input if self.input.param("log_level", None): self.log.setLevel(level=0) for hd in self.log.handlers: if str(hd.__class__).find('FileHandler') != -1: hd.setLevel(level=logging.DEBUG) else: hd.setLevel(level=getattr( logging, self.input.param("log_level", None))) self.vbucket_count = PerfDefaults.vbuckets self.sc = None if self.parami("tear_down_on_setup", PerfDefaults.tear_down_on_setup) == 1: self.tearDown() # Tear down in case previous run had unclean death master = self.input.servers[0] self.set_up_rest(master) def setUpBase1(self): if max(self.parami('num_buckets', 1), self.parami( 'xdcr_num_buckets', 1)) > 1: bucket = 'bucket-0' else: bucket = self.param('bucket', 'default') vBuckets = self.rest.get_vbuckets(bucket) self.vbucket_count = len(vBuckets) if vBuckets else 0 def setUp(self): self.setUpBase0() mc_threads = self.parami("mc_threads", PerfDefaults.mc_threads) if mc_threads != PerfDefaults.mc_threads: for node in self.input.servers: self.set_mc_threads(node, mc_threads) erlang_schedulers = self.param("erlang_schedulers", PerfDefaults.erlang_schedulers) if erlang_schedulers: ClusterOperationHelper.set_erlang_schedulers( self.input.servers, erlang_schedulers) master = self.input.servers[0] self.is_multi_node = False self.data_path = master.data_path # Number of items loaded by load() method. # Does not include or count any items that came from set_up_dgm(). # self.num_items_loaded = 0 if self.input.clusters: for cluster in self.input.clusters.values(): master = cluster[0] self.set_up_rest(master) self.set_up_cluster(master) else: master = self.input.servers[0] self.set_up_cluster(master) # Rebalance if self.input.clusters: for cluster in self.input.clusters.values(): num_nodes = self.parami("num_nodes_before", len(cluster)) self.rebalance_nodes(num_nodes, cluster) else: num_nodes = self.parami("num_nodes", 10) self.rebalance_nodes(num_nodes) if self.input.clusters: for cluster in self.input.clusters.values(): master = cluster[0] self.set_up_rest(master) self.set_up_buckets() else: self.set_up_buckets() self.set_up_proxy() if self.input.clusters: for cluster in self.input.clusters.values(): master = cluster[0] self.set_up_rest(master) self.reconfigure() else: self.reconfigure() if self.parami("dgm", getattr(self, "dgm", 1)): self.set_up_dgm() time.sleep(10) self.setUpBase1() if self.input.clusters: for cluster in self.input.clusters.values(): self.wait_until_warmed_up(cluster[0]) else: self.wait_until_warmed_up() ClusterOperationHelper.flush_os_caches(self.input.servers) def set_up_rest(self, master): self.rest = RestConnection(master) self.rest_helper = RestHelper(self.rest) def set_up_cluster(self, master): """Initialize cluster""" self.log.info("setting up cluster") self.rest.init_cluster(master.rest_username, master.rest_password) memory_quota = self.parami('mem_quota', PerfDefaults.mem_quota) self.rest.init_cluster_memoryQuota(master.rest_username, master.rest_password, memoryQuota=memory_quota) def _get_bucket_names(self, num_buckets): """ Get a list of bucket names """ if num_buckets > 1: buckets = ['bucket-{0}'.format(i) for i in range(num_buckets)] else: buckets = [self.param('bucket', 'default')] return buckets def get_bucket_conf(self): """ retrieve bucket configurations""" num_buckets = max(self.parami('num_buckets', 1), self.parami('xdcr_num_buckets', 1)) self.buckets = self._get_bucket_names(num_buckets) def set_up_buckets(self): """Set up data bucket(s)""" self.log.info("setting up buckets") self.get_bucket_conf() for bucket in self.buckets: bucket_ram_quota = self.parami('mem_quota', PerfDefaults.mem_quota) bucket_threads_num = self.parami('threads_number', PerfDefaults.threads_number) bucket_ram_quota /= max(self.parami('num_buckets', 1), self.parami('xdcr_num_buckets', 1)) replicas = self.parami('replicas', getattr(self, 'replicas', 1)) index_replicas = self.parami('index_replicas', 0) self.rest.create_bucket(bucket=bucket, ramQuotaMB=bucket_ram_quota, replicaNumber=replicas, authType='sasl', threadsNumber=bucket_threads_num, replica_index=index_replicas) status = self.rest_helper.vbucket_map_ready(bucket, 60) self.assertTrue(status, msg='vbucket_map not ready .. timed out') status = self.rest_helper.bucket_exists(bucket) self.assertTrue(status, msg='unable to create {0} bucket'.format(bucket)) def reconfigure(self): """Customize basic Couchbase setup""" self.log.info("customizing setup") self.set_loglevel() self.customize_xdcr_settings() self.set_autocompaction() self.set_exp_pager_stime() self.set_rebalance_options() def set_rebalance_options(self): # rebalanceMovesBeforeCompaction rmbc = self.parami('rebalance_moves_before_compaction', 0) if rmbc: cmd = 'ns_config:set(rebalance_moves_before_compaction, {0}).'\ .format(rmbc) self.rest.diag_eval(cmd) def set_exp_pager_stime(self): exp_pager_stime = self.param('exp_pager_stime', PerfDefaults.exp_pager_stime) if exp_pager_stime != PerfDefaults.exp_pager_stime: self.set_ep_param('flush_param', 'exp_pager_stime', exp_pager_stime) def set_loglevel(self): """Set custom loglevel""" loglevel = self.param('loglevel', None) if loglevel: self.rest.set_global_loglevel(loglevel) def set_mc_threads(self, node, mc_threads): """Change number of memcached threads""" rest = RestConnection(node) rest.set_mc_threads(mc_threads) self.log.info("num of memcached threads = {0}".format(mc_threads)) def customize_xdcr_settings(self): """Set custom XDCR environment variables""" max_concurrent_reps_per_doc = self.param('max_concurrent_reps_per_doc', None) xdcr_doc_batch_size_kb = self.param('xdcr_doc_batch_size_kb', None) xdcr_checkpoint_interval = self.param('xdcr_checkpoint_interval', None) xdcr_latency_optimization = self.param('xdcr_latency_optimization', None) if max_concurrent_reps_per_doc: param = 'xdcrMaxConcurrentReps' value = max_concurrent_reps_per_doc elif xdcr_doc_batch_size_kb: param = 'xdcrDocBatchSizeKb' value = xdcr_doc_batch_size_kb elif xdcr_checkpoint_interval: param = 'xdcrCheckpointInterval' value = xdcr_checkpoint_interval else: return self.log.info("changing {0} to {1}".format(param, value)) for servers in self.input.clusters.values(): rest_conn = RestConnection(servers[0]) replications = rest_conn.get_replications() for repl in replications: src_bucket = repl.get_src_bucket() dst_bucket = repl.get_dest_bucket() rest_conn.set_xdcr_param(src_bucket.name, dst_bucket.name, param, value) def set_ep_compaction(self, comp_ratio): """Set up ep_engine side compaction ratio""" for server in self.input.servers: shell = RemoteMachineShellConnection(server) cmd = "/opt/couchbase/bin/cbepctl localhost:11210 "\ "set flush_param db_frag_threshold {0}".format(comp_ratio) self._exec_and_log(shell, cmd) shell.disconnect() def set_autocompaction(self, disable_view_compaction=False): """Set custom auto-compaction settings""" try: # Parallel database and view compaction parallel_compaction = self.param("parallel_compaction", PerfDefaults.parallel_compaction) # Database fragmentation threshold db_compaction = self.parami("db_compaction", PerfDefaults.db_compaction) self.log.info("database compaction = {0}".format(db_compaction)) # ep_engine fragementation threshold ep_compaction = self.parami("ep_compaction", PerfDefaults.ep_compaction) if ep_compaction != PerfDefaults.ep_compaction: self.set_ep_compaction(ep_compaction) self.log.info( "ep_engine compaction = {0}".format(ep_compaction)) # View fragmentation threshold if disable_view_compaction: view_compaction = 100 else: view_compaction = self.parami("view_compaction", PerfDefaults.view_compaction) # Set custom auto-compaction settings self.rest.set_auto_compaction( parallelDBAndVC=parallel_compaction, dbFragmentThresholdPercentage=db_compaction, viewFragmntThresholdPercentage=view_compaction) except Exception as e: # It's very hard to determine what exception it can raise. # Therefore we have to use general handler. self.log.error( "Error while changing compaction settings: {0}".format(e)) def set_ep_param(self, type, param, value): """ Set ep-engine specific param, using cbepctl type: paramter type, e.g: flush_param, tap_param, etc """ bucket = Bucket(name=self.buckets[0], authType="sasl", saslPassword="") for server in self.input.servers: shell = RemoteMachineShellConnection(server) shell.execute_cbepctl(bucket, "", "set %s" % type, param, value) shell.disconnect() def tearDown(self): if self.parami("tear_down", 0) == 1: self.log.info("routine skipped") return self.log.info("routine starts") if self.parami("tear_down_proxy", 1) == 1: self.tear_down_proxy() else: self.log.info("proxy tearDown skipped") if self.sc is not None: self.sc.stop() self.sc = None if self.parami("tear_down_bucket", 0) == 1: self.tear_down_buckets() else: self.log.info("bucket tearDown skipped") if self.parami("tear_down_cluster", 1) == 1: self.tear_down_cluster() else: self.log.info("cluster tearDown skipped") self.log.info("routine finished") def tear_down_buckets(self): self.log.info("tearing down bucket") BucketOperationHelper.delete_all_buckets_or_assert( self.input.servers, self) self.log.info("bucket teared down") def tear_down_cluster(self): self.log.info("tearing down cluster") ClusterOperationHelper.cleanup_cluster(self.input.servers) ClusterOperationHelper.wait_for_ns_servers_or_assert( self.input.servers, self) self.log.info("Cluster teared down") def set_up_proxy(self, bucket=None): """Set up and start Moxi""" if self.input.moxis: self.log.info("setting up proxy") bucket = bucket or self.param('bucket', 'default') shell = RemoteMachineShellConnection(self.input.moxis[0]) shell.start_moxi(self.input.servers[0].ip, bucket, self.input.moxis[0].port) shell.disconnect() def tear_down_proxy(self): if len(self.input.moxis) > 0: shell = RemoteMachineShellConnection(self.input.moxis[0]) shell.stop_moxi() shell.disconnect() # Returns "host:port" of moxi to hit. def target_host_port(self, bucket='default', use_direct=False): rv = self.param('moxi', None) if use_direct: return "%s:%s" % (self.input.servers[0].ip, '11210') if rv: return rv if len(self.input.moxis) > 0: return "%s:%s" % (self.input.moxis[0].ip, self.input.moxis[0].port) return "%s:%s" % (self.input.servers[0].ip, self.rest.get_bucket(bucket).nodes[0].moxi) def protocol_parse(self, protocol_in, use_direct=False): if protocol_in.find('://') >= 0: if protocol_in.find("couchbase:") >= 0: protocol = "couchbase" else: protocol = \ '-'.join(((["membase"] + protocol_in.split("://"))[-2] + "-binary").split('-')[0:2]) host_port = ('@' + protocol_in.split("://")[-1]).split('@')[-1] user, pswd = (('@' + protocol_in.split("://")[-1]).split('@')[-2] + ":").split(':')[0:2] else: protocol = 'memcached-' + protocol_in host_port = self.target_host_port(use_direct=use_direct) user = self.param("rest_username", "Administrator") pswd = self.param("rest_password", "password") return protocol, host_port, user, pswd def mk_protocol(self, host, port='8091', prefix='membase-binary'): return self.param('protocol', prefix + '://' + host + ':' + port) def get_backups(self, protocol): """ Get backup server lists for memcached-binary """ port = protocol.split(":")[-1] return map(lambda server: "%s:%s" % (server.ip, port), self.input.servers[1:]) def restartProxy(self, bucket=None): self.tear_down_proxy() self.set_up_proxy(bucket) def set_up_dgm(self): """Download fragmented, DGM dataset onto each cluster node, if not already locally available. The number of vbuckets and database schema must match the target cluster. Shutdown all cluster nodes. Do a cluster-restore. Restart all cluster nodes.""" bucket = self.param("bucket", "default") ClusterOperationHelper.stop_cluster(self.input.servers) for server in self.input.servers: remote = RemoteMachineShellConnection(server) #TODO: Better way to pass num_nodes and db_size? self.get_data_files(remote, bucket, 1, 10) remote.disconnect() ClusterOperationHelper.start_cluster(self.input.servers) def get_data_files(self, remote, bucket, num_nodes, db_size): base = 'https://s3.amazonaws.com/database-analysis' dir = '/tmp/' if remote.is_couchbase_installed(): dir = dir + '/couchbase/{0}-{1}-{2}/'.format( num_nodes, 256, db_size) output, error = remote.execute_command('mkdir -p {0}'.format(dir)) remote.log_command_output(output, error) file = '{0}_cb.tar.gz'.format(bucket) base_url = base + '/couchbase/{0}-{1}-{2}/{3}'.format( num_nodes, 256, db_size, file) else: dir = dir + '/membase/{0}-{1}-{2}/'.format(num_nodes, 1024, db_size) output, error = remote.execute_command('mkdir -p {0}'.format(dir)) remote.log_command_output(output, error) file = '{0}_mb.tar.gz'.format(bucket) base_url = base + '/membase/{0}-{1}-{2}/{3}'.format( num_nodes, 1024, db_size, file) info = remote.extract_remote_info() wget_command = 'wget' if info.type.lower() == 'windows': wget_command = \ "cd {0} ;cmd /c 'c:\\automation\\wget.exe --no-check-certificate"\ .format(dir) # Check if the file exists on the remote server else download the gzipped version # Extract if necessary exist = remote.file_exists(dir, file) if not exist: additional_quote = "" if info.type.lower() == 'windows': additional_quote = "'" command = "{0} -v -O {1}{2} {3} {4} ".format( wget_command, dir, file, base_url, additional_quote) output, error = remote.execute_command(command) remote.log_command_output(output, error) if remote.is_couchbase_installed(): if info.type.lower() == 'windows': destination_folder = testconstants.WIN_COUCHBASE_DATA_PATH else: destination_folder = testconstants.COUCHBASE_DATA_PATH else: if info.type.lower() == 'windows': destination_folder = testconstants.WIN_MEMBASE_DATA_PATH else: destination_folder = testconstants.MEMBASE_DATA_PATH if self.data_path: destination_folder = self.data_path untar_command = 'cd {1}; tar -xzf {0}'.format(dir + file, destination_folder) output, error = remote.execute_command(untar_command) remote.log_command_output(output, error) def _exec_and_log(self, shell, cmd): """helper method to execute a command and log output""" if not cmd or not shell: return output, error = shell.execute_command(cmd) shell.log_command_output(output, error) def _build_tar_name(self, bucket, version="unknown_version", file_base=None): """build tar file name. {file_base}-{version}-{bucket}.tar.gz """ if not file_base: file_base = os.path.splitext( os.path.basename( self.param("conf_file", PerfDefaults.conf_file)))[0] return "{0}-{1}-{2}.tar.gz".format(file_base, version, bucket) def _save_snapshot(self, server, bucket, file_base=None): """Save data files to a snapshot""" src_data_path = os.path.dirname(server.data_path or testconstants.COUCHBASE_DATA_PATH) dest_data_path = "{0}-snapshots".format(src_data_path) self.log.info( "server={0}, src_data_path={1}, dest_data_path={2}".format( server.ip, src_data_path, dest_data_path)) shell = RemoteMachineShellConnection(server) build_name, short_version, full_version = \ shell.find_build_version("/opt/couchbase/", "VERSION.txt", "cb") dest_file = self._build_tar_name(bucket, full_version, file_base) self._exec_and_log(shell, "mkdir -p {0}".format(dest_data_path)) # save as gzip file, if file exsits, overwrite # TODO: multiple buckets zip_cmd = "cd {0}; tar -cvzf {1}/{2} {3} {3}-data _*"\ .format(src_data_path, dest_data_path, dest_file, bucket) self._exec_and_log(shell, zip_cmd) shell.disconnect() return True def _load_snapshot(self, server, bucket, file_base=None, overwrite=True): """Load data files from a snapshot""" dest_data_path = os.path.dirname(server.data_path or testconstants.COUCHBASE_DATA_PATH) src_data_path = "{0}-snapshots".format(dest_data_path) self.log.info( "server={0}, src_data_path={1}, dest_data_path={2}".format( server.ip, src_data_path, dest_data_path)) shell = RemoteMachineShellConnection(server) build_name, short_version, full_version = \ shell.find_build_version("/opt/couchbase/", "VERSION.txt", "cb") src_file = self._build_tar_name(bucket, full_version, file_base) if not shell.file_exists(src_data_path, src_file): self.log.error("file '{0}/{1}' does not exist".format( src_data_path, src_file)) shell.disconnect() return False if not overwrite: self._save_snapshot(server, bucket, "{0}.tar.gz".format( time.strftime(PerfDefaults.strftime))) # TODO: filename rm_cmd = "rm -rf {0}/{1} {0}/{1}-data {0}/_*".format( dest_data_path, bucket) self._exec_and_log(shell, rm_cmd) unzip_cmd = "cd {0}; tar -xvzf {1}/{2}".format(dest_data_path, src_data_path, src_file) self._exec_and_log(shell, unzip_cmd) shell.disconnect() return True def save_snapshots(self, file_base, bucket): """Save snapshots on all servers""" if not self.input.servers or not bucket: self.log.error("invalid server list or bucket name") return False ClusterOperationHelper.stop_cluster(self.input.servers) for server in self.input.servers: self._save_snapshot(server, bucket, file_base) ClusterOperationHelper.start_cluster(self.input.servers) return True def load_snapshots(self, file_base, bucket): """Load snapshots on all servers""" if not self.input.servers or not bucket: self.log.error("invalid server list or bucket name") return False ClusterOperationHelper.stop_cluster(self.input.servers) for server in self.input.servers: if not self._load_snapshot(server, bucket, file_base): ClusterOperationHelper.start_cluster(self.input.servers) return False ClusterOperationHelper.start_cluster(self.input.servers) return True def spec(self, reference): self.spec_reference = self.param("spec", reference) def mk_stats(self, verbosity): return StatsCollector(verbosity) def _get_src_version(self): """get testrunner version""" try: result = subprocess.Popen(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE).communicate()[0] except subprocess.CalledProcessError as e: self.log.error("unable to get src code version : {0}".format(e)) return "unknown version" return result.rstrip()[:7] def start_stats(self, stats_spec, servers=None, process_names=('memcached', 'beam.smp'), test_params=None, client_id='', collect_server_stats=True, ddoc=None): if self.parami('stats', 1) == 0: return None servers = servers or self.input.servers clusters = None if hasattr(self, "get_region"): if self.parami("access_phase", 0): clusters = self.input.clusters if self.get_region() == "west": clusters[0], clusters[1] = clusters[1], clusters[0] sc = self.mk_stats(False) bucket = self.param("bucket", "default") sc.start(servers, bucket, process_names, stats_spec, client_id, collect_server_stats=collect_server_stats, ddoc=ddoc, clusters=clusters) test_params['testrunner'] = self._get_src_version() self.test_params = test_params self.sc = sc return self.sc def end_stats(self, sc, total_stats=None, stats_spec=None): if sc is None: return if stats_spec is None: stats_spec = self.spec_reference if total_stats: sc.total_stats(total_stats) self.log.info("stopping stats collector") sc.stop() self.log.info("stats collector is stopped") sc.export(stats_spec, self.test_params) def load(self, num_items, min_value_size=None, kind='binary', protocol='binary', ratio_sets=1.0, ratio_hot_sets=0.0, ratio_hot_gets=0.0, ratio_expirations=0.0, expiration=None, prefix="", doc_cache=1, use_direct=True, report=0, start_at=-1, collect_server_stats=True, is_eperf=False, hot_shift=0): cfg = { 'max-items': num_items, 'max-creates': num_items, 'max-ops-per-sec': self.parami("load_mcsoda_max_ops_sec", PerfDefaults.mcsoda_max_ops_sec), 'min-value-size': min_value_size or self.parami("min_value_size", 1024), 'ratio-sets': self.paramf("load_ratio_sets", ratio_sets), 'ratio-misses': self.paramf("load_ratio_misses", 0.0), 'ratio-creates': self.paramf("load_ratio_creates", 1.0), 'ratio-deletes': self.paramf("load_ratio_deletes", 0.0), 'ratio-hot': 0.0, 'ratio-hot-sets': ratio_hot_sets, 'ratio-hot-gets': ratio_hot_gets, 'ratio-expirations': ratio_expirations, 'expiration': expiration or 0, 'exit-after-creates': 1, 'json': int(kind == 'json'), 'batch': self.parami("batch", PerfDefaults.batch), 'vbuckets': self.vbucket_count, 'doc-cache': doc_cache, 'prefix': prefix, 'report': report, 'hot-shift': hot_shift, 'cluster_name': self.param("cluster_name", "") } cur = {} if start_at >= 0: cur['cur-items'] = start_at cur['cur-gets'] = start_at cur['cur-sets'] = start_at cur['cur-ops'] = cur['cur-gets'] + cur['cur-sets'] cur['cur-creates'] = start_at cfg['max-creates'] = start_at + num_items cfg['max-items'] = cfg['max-creates'] cfg_params = cfg.copy() cfg_params['test_time'] = time.time() cfg_params['test_name'] = self.id() # phase: 'load' or 'reload' phase = "load" if self.parami("hot_load_phase", 0) == 1: # all gets if self.parami("hot_load_get", PerfDefaults.hot_load_get) == 1: cfg['ratio-sets'] = 0 cfg['exit-after-creates'] = 0 cfg['exit-after-gets'] = 1 cfg['max-gets'] = start_at + num_items phase = "reload" if is_eperf: collect_server_stats = self.parami("prefix", 0) == 0 client_id = self.parami("prefix", 0) sc = self.start_stats( "{0}.{1}".format(self.spec_reference, phase), # stats spec e.x: testname.load test_params=cfg_params, client_id=client_id, collect_server_stats=collect_server_stats) # For Black box, multi node tests # always use membase-binary if self.is_multi_node: protocol = self.mk_protocol(host=self.input.servers[0].ip, port=self.input.servers[0].port) protocol, host_port, user, pswd = \ self.protocol_parse(protocol, use_direct=use_direct) if not user.strip(): if "11211" in host_port: user = self.param("bucket", "default") else: user = self.input.servers[0].rest_username if not pswd.strip(): if not "11211" in host_port: pswd = self.input.servers[0].rest_password self.log.info("mcsoda %s %s %s %s" % (protocol, host_port, user, pswd)) self.log.info("mcsoda cfg:\n" + pprint.pformat(cfg)) self.log.info("mcsoda cur:\n" + pprint.pformat(cfg)) cur, start_time, end_time = \ self.mcsoda_run(cfg, cur, protocol, host_port, user, pswd, stats_collector=sc, heartbeat=self.parami("mcsoda_heartbeat", 0), why="load", bucket=self.param("bucket", "default")) self.num_items_loaded = num_items ops = { 'tot-sets': cur.get('cur-sets', 0), 'tot-gets': cur.get('cur-gets', 0), 'tot-items': cur.get('cur-items', 0), 'tot-creates': cur.get('cur-creates', 0), 'tot-misses': cur.get('cur-misses', 0), "start-time": start_time, "end-time": end_time } if is_eperf: if self.parami("load_wait_until_drained", 1) == 1: self.wait_until_drained() if self.parami("load_wait_until_repl", PerfDefaults.load_wait_until_repl) == 1: self.wait_until_repl() self.end_stats(sc, ops, "{0}.{1}".format(self.spec_reference, phase)) return ops, start_time, end_time def mcsoda_run(self, cfg, cur, protocol, host_port, user, pswd, stats_collector=None, stores=None, ctl=None, heartbeat=0, why="", bucket="default", backups=None): return mcsoda.run(cfg, cur, protocol, host_port, user, pswd, stats_collector=stats_collector, stores=stores, ctl=ctl, heartbeat=heartbeat, why=why, bucket=bucket, backups=backups) def rebalance_nodes(self, num_nodes, cluster=None): """Rebalance cluster(s) if more than 1 node provided""" if len(self.input.servers) == 1 or num_nodes == 1: self.log.warn("running on single node cluster") return else: self.log.info( "rebalancing nodes - num_nodes = {0}".format(num_nodes)) if not cluster: cluster = self.input.servers status, _ = RebalanceHelper.rebalance_in(cluster, num_nodes - 1, do_shuffle=False) self.assertTrue(status) def delayed_rebalance_worker(self, servers, num_nodes, delay_seconds, sc, max_retries=PerfDefaults.reb_max_retries, reb_mode=PerfDefaults.REB_MODE.IN): time.sleep(delay_seconds) gmt_now = time.strftime(PerfDefaults.strftime, time.gmtime()) self.log.info("rebalance started") if not sc: self.log.error("invalid stats collector") return status = False retries = 0 while not status and retries <= max_retries: start_time = time.time() if reb_mode == PerfDefaults.REB_MODE.OUT: status, nodes = RebalanceHelper.rebalance_out( servers, num_nodes) elif reb_mode == PerfDefaults.REB_MODE.SWAP: status, nodes = RebalanceHelper.rebalance_swap( servers, num_nodes) else: status, nodes = RebalanceHelper.rebalance_in( servers, num_nodes - 1, do_check=(not retries)) end_time = time.time() self.log.info("status: {0}, nodes: {1}, retries: {2}".format( status, nodes, retries)) if not status: retries += 1 time.sleep(delay_seconds) sc.reb_stats(start_time, end_time - start_time) if self.parami("master_events", PerfDefaults.master_events): filename = "master_events.log" with open(filename, "w") as f: f.write(self.rest.diag_master_events()[1]) def delayed_rebalance(self, num_nodes, delay_seconds=10, max_retries=PerfDefaults.reb_max_retries, reb_mode=0, sync=False): self.log.info("delayed_rebalance") if sync: PerfBase.delayed_rebalance_worker(self, self.input.servers, num_nodes, delay_seconds, self.sc, max_retries, reb_mode) else: t = threading.Thread(target=PerfBase.delayed_rebalance_worker, args=(self, self.input.servers, num_nodes, delay_seconds, self.sc, max_retries, reb_mode)) t.daemon = True t.start() @staticmethod def set_auto_compaction(server, parallel_compaction, percent_threshold): rest = RestConnection(server) rest.set_auto_compaction( parallel_compaction, dbFragmentThresholdPercentage=percent_threshold, viewFragmntThresholdPercentage=percent_threshold) @staticmethod def delayed_compaction_worker(servers, parallel_compaction, percent_threshold, delay_seconds): time.sleep(delay_seconds) PerfBase.set_auto_compaction(servers[0], parallel_compaction, percent_threshold) def delayed_compaction(self, parallel_compaction="false", percent_threshold=0.01, delay_seconds=10): t = threading.Thread(target=PerfBase.delayed_compaction_worker, args=(self.input.servers, parallel_compaction, percent_threshold, delay_seconds)) t.daemon = True t.start() def loop(self, num_ops=None, num_items=None, max_items=None, max_creates=None, min_value_size=None, exit_after_creates=0, kind='binary', protocol='binary', clients=1, ratio_misses=0.0, ratio_sets=0.0, ratio_creates=0.0, ratio_deletes=0.0, ratio_hot=0.2, ratio_hot_sets=0.95, ratio_hot_gets=0.95, ratio_expirations=0.0, expiration=None, test_name=None, prefix="", doc_cache=1, use_direct=True, collect_server_stats=True, start_at=-1, report=0, ctl=None, hot_shift=0, is_eperf=False, ratio_queries=0, queries=0, ddoc=None): num_items = num_items or self.num_items_loaded hot_stack_size = \ self.parami('hot_stack_size', PerfDefaults.hot_stack_size) or \ (num_items * ratio_hot) cfg = { 'max-items': max_items or num_items, 'max-creates': max_creates or 0, 'max-ops-per-sec': self.parami("mcsoda_max_ops_sec", PerfDefaults.mcsoda_max_ops_sec), 'min-value-size': min_value_size or self.parami("min_value_size", 1024), 'exit-after-creates': exit_after_creates, 'ratio-sets': ratio_sets, 'ratio-misses': ratio_misses, 'ratio-creates': ratio_creates, 'ratio-deletes': ratio_deletes, 'ratio-hot': ratio_hot, 'ratio-hot-sets': ratio_hot_sets, 'ratio-hot-gets': ratio_hot_gets, 'ratio-expirations': ratio_expirations, 'ratio-queries': ratio_queries, 'expiration': expiration or 0, 'threads': clients, 'json': int(kind == 'json'), 'batch': self.parami("batch", PerfDefaults.batch), 'vbuckets': self.vbucket_count, 'doc-cache': doc_cache, 'prefix': prefix, 'queries': queries, 'report': report, 'hot-shift': hot_shift, 'hot-stack': self.parami("hot_stack", PerfDefaults.hot_stack), 'hot-stack-size': hot_stack_size, 'hot-stack-rotate': self.parami("hot_stack_rotate", PerfDefaults.hot_stack_rotate), 'cluster_name': self.param("cluster_name", ""), 'observe': self.param("observe", PerfDefaults.observe), 'obs-backoff': self.paramf('obs_backoff', PerfDefaults.obs_backoff), 'obs-max-backoff': self.paramf('obs_max_backoff', PerfDefaults.obs_max_backoff), 'obs-persist-count': self.parami('obs_persist_count', PerfDefaults.obs_persist_count), 'obs-repl-count': self.parami('obs_repl_count', PerfDefaults.obs_repl_count), 'woq-pattern': self.parami('woq_pattern', PerfDefaults.woq_pattern), 'woq-verbose': self.parami('woq_verbose', PerfDefaults.woq_verbose), 'cor-pattern': self.parami('cor_pattern', PerfDefaults.cor_pattern), 'cor-persist': self.parami('cor_persist', PerfDefaults.cor_persist), 'time': self.parami('time', 0), 'cbm': self.parami('cbm', PerfDefaults.cbm), 'cbm-host': self.param('cbm_host', PerfDefaults.cbm_host), 'cbm-port': self.parami('cbm_port', PerfDefaults.cbm_port) } cfg_params = cfg.copy() cfg_params['test_time'] = time.time() cfg_params['test_name'] = test_name client_id = '' stores = None if is_eperf: client_id = self.parami("prefix", 0) sc = None if self.parami("collect_stats", 1): sc = self.start_stats(self.spec_reference + ".loop", test_params=cfg_params, client_id=client_id, collect_server_stats=collect_server_stats, ddoc=ddoc) self.cur = {'cur-items': num_items} if start_at >= 0: self.cur['cur-gets'] = start_at if num_ops is None: num_ops = num_items if isinstance(num_ops, int): cfg['max-ops'] = num_ops else: # Here, we num_ops looks like "time to run" tuple of... # ('seconds', integer_num_of_seconds_to_run) cfg['time'] = num_ops[1] # For Black box, multi node tests # always use membase-binary if self.is_multi_node: protocol = self.mk_protocol(host=self.input.servers[0].ip, port=self.input.servers[0].port) backups = self.get_backups(protocol) self.log.info("mcsoda protocol %s" % protocol) protocol, host_port, user, pswd = \ self.protocol_parse(protocol, use_direct=use_direct) if not user.strip(): if "11211" in host_port: user = self.param("bucket", "default") else: user = self.input.servers[0].rest_username if not pswd.strip(): if not "11211" in host_port: pswd = self.input.servers[0].rest_password self.log.info("mcsoda %s %s %s %s" % (protocol, host_port, user, pswd)) self.log.info("mcsoda cfg:\n" + pprint.pformat(cfg)) self.log.info("mcsoda cur:\n" + pprint.pformat(cfg)) self.log.info("mcsoda backups: %s" % backups) # For query tests always use StoreCouchbase if protocol == "couchbase": stores = [StoreCouchbase()] self.cur, start_time, end_time = \ self.mcsoda_run(cfg, self.cur, protocol, host_port, user, pswd, stats_collector=sc, ctl=ctl, stores=stores, heartbeat=self.parami("mcsoda_heartbeat", 0), why="loop", bucket=self.param("bucket", "default"), backups=backups) ops = { 'tot-sets': self.cur.get('cur-sets', 0), 'tot-gets': self.cur.get('cur-gets', 0), 'tot-items': self.cur.get('cur-items', 0), 'tot-creates': self.cur.get('cur-creates', 0), 'tot-misses': self.cur.get('cur-misses', 0), "start-time": start_time, "end-time": end_time } if self.parami("loop_wait_until_drained", PerfDefaults.loop_wait_until_drained): self.wait_until_drained() if self.parami("loop_wait_until_repl", PerfDefaults.loop_wait_until_repl): self.wait_until_repl() if self.parami("collect_stats", 1) and \ not self.parami("reb_no_fg", PerfDefaults.reb_no_fg): self.end_stats(sc, ops, self.spec_reference + ".loop") why = self.params("why", "main") prefix = self.parami("prefix", 0) self.log.info("finished") return ops, start_time, end_time def wait_until_drained(self): self.log.info("draining disk write queue") master = self.input.servers[0] bucket = self.param("bucket", "default") ready = RebalanceHelper.wait_for_persistence(master, bucket) self.assertTrue(ready, "not all items persisted. see logs") self.log.info("disk write queue has been drained") return time.time() def wait_until_repl(self): self.log.info("waiting for replication") master = self.input.servers[0] bucket = self.param("bucket", "default") RebalanceHelper.wait_for_stats_on_all( master, bucket, 'vb_replica_queue_size', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all( master, bucket, 'ep_tap_replica_queue_itemondisk', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all( master, bucket, 'ep_tap_rebalance_queue_backfillremaining', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all( master, bucket, 'ep_tap_replica_qlen', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) self.log.info("replication is done") def warmup(self, collect_stats=True, flush_os_cache=False): """ Restart cluster and wait for it to warm up. In current version, affect the master node only. """ if not self.input.servers: self.log.error("empty server list") return if collect_stats: client_id = self.parami("prefix", 0) test_params = { 'test_time': time.time(), 'test_name': self.id(), 'json': 0 } sc = self.start_stats(self.spec_reference + ".warmup", test_params=test_params, client_id=client_id) self.log.info("preparing to warmup cluster ...") server = self.input.servers[0] shell = RemoteMachineShellConnection(server) start_time = time.time() self.log.info("stopping couchbase ... ({0})".format(server.ip)) shell.stop_couchbase() self.log.info("couchbase stopped ({0})".format(server.ip)) if flush_os_cache: self.log.info("flushing os cache ...") shell.flush_os_caches() shell.start_couchbase() self.log.info("couchbase restarted ({0})".format(server.ip)) self.wait_until_warmed_up() self.log.info("warmup finished") end_time = time.time() ops = { 'tot-sets': 0, 'tot-gets': 0, 'tot-items': 0, 'tot-creates': 0, 'tot-misses': 0, "start-time": start_time, "end-time": end_time } if collect_stats: self.end_stats(sc, ops, self.spec_reference + ".warmup") def wait_until_warmed_up(self, master=None): if not master: master = self.input.servers[0] bucket = self.param("bucket", "default") fn = RebalanceHelper.wait_for_mc_stats_no_timeout for bucket in self.buckets: RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_warmup_thread', 'complete', fn=fn) def set_param(self, name, val): input = getattr(self, "input", TestInputSingleton.input) input.test_params[name] = str(val) return True def wait_for_task_completion(self, task='indexer'): """Wait for ns_server task to finish""" t0 = time.time() self.log.info("Waiting 30 seconds before {0} monitoring".format(task)) time.sleep(30) while True: tasks = self.rest.ns_server_tasks() if tasks: try: progress = [ t['progress'] for t in tasks if t['type'] == task ] except TypeError: self.log.error(tasks) else: if progress: self.log.info("{0} progress: {1}".format( task, progress)) time.sleep(10) else: break t1 = time.time() self.log.info("Time taken to perform task: {0} sec".format(t1 - t0)) def param(self, name, default_value): input = getattr(self, "input", TestInputSingleton.input) return input.test_params.get(name, default_value) def parami(self, name, default_int): return int(self.param(name, default_int)) def paramf(self, name, default_float): return float(self.param(name, default_float)) def params(self, name, default_str): return str(self.param(name, default_str))
def set_up_rest(self, master): self.rest = RestConnection(master) self.rest_helper = RestHelper(self.rest)
class FTSServerGroups(FTSBaseTest, NewUpgradeBaseTest): def setUp(self): super(FTSServerGroups, self).setUp() self.rest = RestConnection(self._cb_cluster.get_master_node()) self.helper = RestHelper(self.rest) self.default_group_name = "Group 1" self.fts_query = {"match": "emp", "field": "type"} self._cleanup_server_groups() def tearDown(self): super(FTSServerGroups, self).tearDown() def test_mixed_cluster(self): self.initial_version = self._input.param('kv_build', '6.6.3-9700') self.product = self._input.param('product', 'couchbase-server') self.initial_vbuckets = 1024 self.debug_logs = False self.init_nodes = True self.initial_build_type = None self.use_hostnames = False kv_nodes = self._cb_cluster.get_kv_nodes() fts_nodes = self._cb_cluster.get_fts_nodes() self._install(servers=kv_nodes) self.initial_version = self._input.param('upgrade_version', '7.1.0-1092') self._install(servers=fts_nodes) super(FTSServerGroups, self).setUp() self.test_nodes_ejection() def test_nodes_ejection(self): eject_nodes_structure = self._input.param("eject_nodes", None) eject_type = self._input.param("eject_type", None) initial_query_zones = self._input.param("query_zone_before_eject", None).split("|") post_eject_query_zones = self._input.param("query_zone_after_eject", None).split("|") self.build_cluster() self.load_data() idx = self.build_index() fts_nodes = [] for initial_query_zone in initial_query_zones: fts_nodes.extend( self.get_zone_healthy_fts_nodes(zone=initial_query_zone)) initial_hits = self.query_node(index=idx, node=fts_nodes[0]) for node in fts_nodes[1:]: hits = self.query_node(index=idx, node=node) self.assertEqual( initial_hits, hits, "Difference in search results before node eject detected.") ejected_nodes = self.eject_nodes( eject_nodes_structure=eject_nodes_structure, eject_type=eject_type) post_eject_query_nodes = [] for post_eject_query_zone in post_eject_query_zones: fts_nodes = self.get_zone_healthy_fts_nodes( zone=post_eject_query_zone) post_eject_query_nodes.extend(fts_nodes) self._maybe_rebalance() try: for healthy_fts_node in post_eject_query_nodes: post_eject_hits, _, _, _ = idx.execute_query( self.fts_query, node=healthy_fts_node) self.assertEqual( initial_hits, post_eject_hits, "Hits are different after server groups modification!") finally: if eject_type == "shutdown": for ejected_node in ejected_nodes: remote = RemoteMachineShellConnection(ejected_node) remote.start_couchbase() def test_index_modification(self): mod_type = self._input.param("mod_type", None) self.build_cluster() self.load_data() idx = self.build_index() self.update_index(index=idx, mod_type=mod_type) self._maybe_rebalance() fts_nodes = self._cb_cluster.get_fts_nodes() etalon_hits = self.query_node(index=idx, node=fts_nodes[0]) for node in fts_nodes[1:]: hits = self.query_node(index=idx, node=node) self.assertEqual( etalon_hits, hits, "Found differences in fts request results between nodes after index modification" ) def test_replicas_distribution(self): final_replicas = self._input.param("final_replicas", 0) self.build_cluster() self.load_data() idx = self.build_index() idx.update_num_replicas(final_replicas) self.wait_for_indexing_complete(item_count=1000) self._maybe_rebalance() index_replica = idx.get_num_replicas() zones_with_replica = self.calculate_zones_with_replica(index=idx) self.assertEqual( index_replica + 1, zones_with_replica, f"Found incorrect replicas distribution: index replicas: {index_replica}" f", zones with replica count: {zones_with_replica}") def test_partitions_distribution(self): index_partitions = int(self._input.param("partitions", 1)) self.build_cluster() self.load_data() idx = self.build_index() self.wait_for_indexing_complete(item_count=1000) self._maybe_rebalance() for zone in self.rest.get_zone_names(): zone_fts_nodes = self.get_zone_healthy_fts_nodes(zone=zone) if len(zone_fts_nodes) > 0: zone_partitions_count = 0 for node in zone_fts_nodes: rest_client = RestConnection(node) _, num_pindexes = rest_client.get_fts_stats( index_name=idx.name, bucket_name=idx.source_bucket.name, stat_name="num_pindexes_actual") zone_partitions_count = zone_partitions_count + num_pindexes self.assertEqual( zone_partitions_count, index_partitions, "Actual initial partitions distribution differs from expected." ) def test_server_groups_modification(self): index_partitions = int(self._input.param("partitions", 1)) final_replicas = self._input.param("final_replicas", None) self.build_cluster() available_nodes = self.rebuild_cluster_to_initial_state() self.load_data() idx = self.build_index() self.wait_for_indexing_complete(item_count=1000) self.modify_server_groups(available_nodes=available_nodes) if final_replicas: idx.update_num_replicas(final_replicas) self.wait_for_indexing_complete(item_count=1000) self._maybe_rebalance() for zone in self.rest.get_zone_names(): zone_fts_nodes = self.get_zone_healthy_fts_nodes(zone=zone) if len(zone_fts_nodes) > 0: zone_partitions_count = 0 for node in zone_fts_nodes: rest_client = RestConnection(node) _, num_pindexes = rest_client.get_fts_stats( index_name=idx.name, bucket_name=idx.source_bucket.name, stat_name="num_pindexes_actual") zone_partitions_count = zone_partitions_count + num_pindexes self.assertEqual( zone_partitions_count, index_partitions, "Actual post server groups update partitions distribution differs from expected." ) fts_nodes = self._cb_cluster.get_fts_nodes() self._maybe_rebalance() initial_hits = self.query_node(index=idx, node=fts_nodes[0]) for node in fts_nodes[1:]: hits = self.query_node(index=idx, node=node) self.assertEqual( initial_hits, hits, "Difference in search results after server groups modification is detected." ) def test_creation_order(self): index_partitions = int(self._input.param("partitions", 1)) self.load_data() ordering = self._input.param("creation_order", None) if 'groups_first' == ordering: self.build_cluster() idx = self.build_index() self.wait_for_indexing_complete(item_count=1000) elif 'index_in_between' == ordering: self.build_cluster() available_nodes = self.rebuild_cluster_to_initial_state() idx = self.build_index() self.wait_for_indexing_complete(item_count=1000) self.modify_server_groups(available_nodes=available_nodes) else: idx = self.build_index() self.wait_for_indexing_complete(item_count=1000) self.build_cluster() self.wait_for_indexing_complete(item_count=1000) self._maybe_rebalance() for zone in self.rest.get_zone_names(): zone_fts_nodes = self.get_zone_healthy_fts_nodes(zone=zone) if len(zone_fts_nodes) > 0: zone_partitions_count = 0 for node in zone_fts_nodes: rest_client = RestConnection(node) _, num_pindexes = rest_client.get_fts_stats( index_name=idx.name, bucket_name=idx.source_bucket.name, stat_name="num_pindexes_actual") zone_partitions_count = zone_partitions_count + num_pindexes self.assertEqual( zone_partitions_count, index_partitions, "Actual initial partitions distribution differs from expected." ) fts_nodes = self._cb_cluster.get_fts_nodes() initial_hits = self.query_node(index=idx, node=fts_nodes[0]) for node in fts_nodes[1:]: hits = self.query_node(index=idx, node=node) self.assertEqual( initial_hits, hits, "Difference in search results after server groups modification is detected." ) def test_best_effort_distribution(self): eject_nodes_structure = self._input.param("eject_nodes", None) eject_type = self._input.param("eject_type", None) self.build_cluster() self.load_data() idx = self.build_index() self.wait_for_indexing_complete(item_count=1000) fts_nodes = self._cb_cluster.get_fts_nodes() initial_hits = self.query_node(index=idx, node=fts_nodes[0]) self.eject_nodes(eject_nodes_structure=eject_nodes_structure, eject_type=eject_type) self.wait_for_indexing_complete(item_count=1000) self._maybe_rebalance() fts_nodes = self._cb_cluster.get_fts_nodes() for node in fts_nodes: hits = self.query_node(index=idx, node=node) self.assertEqual( initial_hits, hits, "Difference in search results after server group failover is detected." ) def test_best_effort_distribution_max_group(self): eject_nodes_structure = self._input.param("eject_nodes", None) eject_type = self._input.param("eject_type", None) initial_server_groups = {} sg_structure = self._input.param("server_groups", None) server_groups = sg_structure.split("|") for server_group in server_groups: group_name = server_group.split("-")[0] group_structure = server_group.split("-")[1] initial_server_groups[group_name] = group_structure self.build_cluster() self.load_data() idx = self.build_index() self.wait_for_indexing_complete(item_count=1000) fts_nodes = self._cb_cluster.get_fts_nodes() initial_hits = self.query_node(index=idx, node=fts_nodes[0]) self.eject_nodes(eject_nodes_structure=eject_nodes_structure, eject_type=eject_type) self.wait_for_indexing_complete(item_count=1000) self._maybe_rebalance() # server group having maximum number of partitions max_server_group = self.find_max_server_group(idx=idx) # Let only maximal server group to stay alive for zone in initial_server_groups: if zone != max_server_group: self.eject_nodes( eject_nodes_structure= f"{zone}-{initial_server_groups[zone].replace('D:','')}", eject_type="failover") max_group_fts_nodes = self.get_zone_healthy_fts_nodes(max_server_group) # find fts node in maximal server group holding min index partitions min_fts_node = None min_num_partitions = 1000 for i in range(len(max_group_fts_nodes)): curr_partitions = self.get_num_partitions_distribution( index=idx, node=max_group_fts_nodes[i]) if curr_partitions < min_num_partitions: min_fts_node = max_group_fts_nodes[i] min_num_partitions = curr_partitions # testing min fts node from maximal server group individually, failing over all the rest fts nodes from maximal server group for i in range(len(max_group_fts_nodes)): if max_group_fts_nodes[i].ip != min_fts_node.ip: self._cb_cluster.failover(graceful=False, node=max_group_fts_nodes[i]) self._maybe_rebalance() min_fts_node_hints = self.query_node(index=idx, node=min_fts_node) self.assertEqual(initial_hits, min_fts_node_hints, "Best effort distribution test is failed.") def test_best_effort_distribution_negative(self): eject_nodes_structure = self._input.param("eject_nodes", None) eject_type = self._input.param("eject_type", None) self.build_cluster() self.load_data() idx = self.build_index() self.wait_for_indexing_complete(item_count=1000) self.eject_nodes(eject_nodes_structure=eject_nodes_structure, eject_type=eject_type) self.wait_for_indexing_complete(item_count=1000) self._maybe_rebalance() fts_nodes = self._cb_cluster.get_fts_nodes() for node in fts_nodes: hits = self.query_node(index=idx, node=node) self.assertGreater( hits, 0, "Partial search results were not returned by alive fts node.") def test_replicas_distribution_negative(self): self.build_cluster() self.load_data() self._maybe_rebalance() try: idx = self.build_index() self.wait_for_indexing_complete(item_count=1000) self.fail( "Was able to create index having 2 replicas for a cluster containing just 2 fts nodes but 3 server groups." ) except Exception as e: self.assertTrue( "cluster needs 3 search nodes to support the requested replica count of 2" in str(e), "Unexpected error message while trying to create index with incorrect number of replicas." ) def test_group_autofailover(self): eject_nodes_structure = self._input.param("eject_nodes", None) eject_type = self._input.param("eject_type", None) self.build_cluster() self.load_data() idx = self.build_index() self.wait_for_indexing_complete(item_count=1000) self.rest.update_autofailover_settings(True, 60, enableServerGroup=True) ejected_nodes = self.eject_nodes( eject_nodes_structure=eject_nodes_structure, eject_type=eject_type) try: self.sleep( 120, "Waiting for server group auto failover to be started.") initial_hits = self.query_node( index=idx, node=self._cb_cluster.get_fts_nodes()[0]) for zone in self.rest.get_zone_names(): fts_nodes = self.get_zone_healthy_fts_nodes(zone=zone) for node in fts_nodes: hits = self.query_node(index=idx, node=node) self.assertEqual( initial_hits, hits, "Difference in search results after server group auto-failover is detected." ) finally: for ejected_node in ejected_nodes: remote = RemoteMachineShellConnection(ejected_node) remote.start_couchbase() def find_max_server_group(self, idx=None): max_partitions_count = 0 max_group = None for zone in self.rest.get_zone_names(): zone_fts_nodes = self.get_zone_healthy_fts_nodes(zone=zone) if len(zone_fts_nodes) > 0: zone_partitions_count = 0 for node in zone_fts_nodes: rest_client = RestConnection(node) _, num_pindexes = rest_client.get_fts_stats( index_name=idx.name, bucket_name=idx.source_bucket.name, stat_name="num_pindexes_actual") zone_partitions_count = zone_partitions_count + num_pindexes if zone_partitions_count > max_partitions_count: max_partitions_count = zone_partitions_count max_group = zone return max_group def modify_server_groups(self, available_nodes=None): eject_nodes_structure = self._input.param("eject_nodes", None) eject_type = self._input.param("eject_type", None) operation = self._input.param("operation", None) if 'add_group' == operation: add_groups = self._input.param("add_server_group", None).split("|") for add_group in add_groups: group_name = add_group.split("-")[0] group_nodes = add_group.split("-")[1].split(":") self.rest.add_zone(group_name) nodes_to_move = [] for node in group_nodes: node_to_shuffle = available_nodes.pop(0) nodes_to_move.append(node_to_shuffle.ip) if 'D' == node: self._cb_cluster.rebalance_in_node( nodes_in=[node_to_shuffle], services=['kv'], sleep_before_rebalance=0) elif 'F' == node: self._cb_cluster.rebalance_in_node( nodes_in=[node_to_shuffle], services=['fts'], sleep_before_rebalance=0) else: self.fail(f"Unsupported node type found {node}!") self.rest.shuffle_nodes_in_zones( moved_nodes=nodes_to_move, source_zone=self.default_group_name, target_zone=group_name) elif 'remove_group' == operation: self.eject_nodes(eject_nodes_structure=eject_nodes_structure, eject_type=eject_type) elif 'add_nodes' == operation: extend_groups = self._input.param("groups_additions", None).split("|") for extended_group in extend_groups: group_name = extended_group.split("-")[0] nodes = extended_group.split("-")[1] nodes_to_move = [] for node in nodes: node_to_shuffle = available_nodes.pop(0) if 'D' == node: self._cb_cluster.rebalance_in_node( nodes_in=[node_to_shuffle], services=['kv'], sleep_before_rebalance=0) elif 'F' == node: self._cb_cluster.rebalance_in_node( nodes_in=[node_to_shuffle], services=['fts'], sleep_before_rebalance=0) nodes_to_move.append(node_to_shuffle.ip) self.rest.shuffle_nodes_in_zones( moved_nodes=nodes_to_move, source_zone=self.default_group_name, target_zone=group_name) elif 'swap_nodes' == operation: server_group1_fts_node = None server_group2_fts_node = None server_group1_nodes = self.rest.get_nodes_in_zone('sg1') server_group2_nodes = self.rest.get_nodes_in_zone('sg2') for key in server_group1_nodes: for fts_node in self._cb_cluster.get_fts_nodes(): if fts_node.ip == key: server_group1_fts_node = fts_node break for key in server_group2_nodes: for fts_node in self._cb_cluster.get_fts_nodes(): if fts_node.ip == key: server_group2_fts_node = fts_node break self.rest.shuffle_nodes_in_zones( moved_nodes=[server_group1_fts_node.ip], source_zone='sg1', target_zone='sg2') self.rest.shuffle_nodes_in_zones( moved_nodes=[server_group2_fts_node.ip], source_zone='sg2', target_zone='sg1') elif 'rename' == operation: self.rest.rename_zone('sg1', 'sg1_1') self.rest.rename_zone('sg2', 'sg1_2') def get_num_partitions_distribution(self, index=None, node=None): rest_client = RestConnection(node) _, num_pindexes = rest_client.get_fts_stats( index_name=index.name, bucket_name=index.source_bucket.name, stat_name="num_pindexes_actual") return num_pindexes def calculate_zones_with_replica(self, index=None): zones_list = self.rest.get_all_zones_info() zones_with_replica = 0 for zone in zones_list['groups']: replica_found = False nodes = zone['nodes'] for node in nodes: if replica_found: break if 'fts' in node['services']: hostname = node['hostname'][0:node['hostname'].find(":")] for fts_node in self._cb_cluster.get_fts_nodes(): if fts_node.ip == hostname: rest_client = RestConnection(fts_node) _, num_pindexes = rest_client.get_fts_stats( index_name=index.name, bucket_name=index.source_bucket.name, stat_name="num_pindexes_actual") if num_pindexes > 0: replica_found = True zones_with_replica += 1 break return zones_with_replica def update_index(self, index=None, mod_type=None): if mod_type == 'custom_mapping': index.index_definition['params']['doc_config'] = {} doc_config = {} doc_config['mode'] = 'type_field' doc_config['type_field'] = 'dept' index.index_definition['params']['doc_config'] = doc_config index.add_type_mapping_to_index_definition(type="filler", analyzer="standard") index.index_definition['params']['mapping'] = { "default_analyzer": "standard", "default_datetime_parser": "dateTimeOptional", "default_field": "_all", "default_mapping": { "dynamic": False, "enabled": False }, "default_type": "_default", "docvalues_dynamic": True, "index_dynamic": True, "store_dynamic": False, "type_field": "_type", "types": { "Sales": { "default_analyzer": "standard", "dynamic": True, "enabled": True, } } } index.index_definition['uuid'] = index.get_uuid() index.update() elif "delete" == mod_type: self._cb_cluster.delete_fts_index(index.name) self.wait_for_indexing_complete(item_count=1000) def rebuild_cluster_to_initial_state(self): cleanup_nodes = self.rest.get_nodes_in_zone(self.default_group_name) nodes_to_remove = [] for key in cleanup_nodes.keys(): node = self._cb_cluster.get_node(key, str(8091)) nodes_to_remove.append(node) self._cb_cluster.rebalance_out_node(node=node, sleep_before_rebalance=0) return nodes_to_remove def build_cluster(self): sg_structure = self._input.param("server_groups", None) server_groups = sg_structure.split("|") available_kv_nodes = self._cb_cluster.get_kv_nodes() available_fts_nodes = self._cb_cluster.get_fts_nodes() for server_group in server_groups: group_name = server_group.split("-")[0] group_nodes = server_group.split("-")[1].split(":") self.rest.add_zone(group_name) self.rest.get_all_zones_info() nodes_to_move = [] for node in group_nodes: if 'D' == node: if len(available_kv_nodes) == 0: self.fail("Cannot find any available kv node!") nodes_to_move.append(available_kv_nodes.pop(0).ip) elif 'F' == node: if len(available_fts_nodes) == 0: self.fail("Cannot find any available fts node!") nodes_to_move.append(available_fts_nodes.pop(0).ip) else: self.fail(f"Unsupported node type found {node}!") self.rest.shuffle_nodes_in_zones( moved_nodes=nodes_to_move, source_zone=self.default_group_name, target_zone=group_name) def build_index(self): replicas = self._input.param("replicas", 0) partitions = self._input.param("partitions", 1) collection_index, _type, index_scope, index_collections = self.define_index_parameters_collection_related( ) idx = self.create_index( bucket=self._cb_cluster.get_bucket_by_name('default'), index_name="fts_idx", collection_index=collection_index, _type=_type, scope=index_scope, collections=index_collections) idx.update_index_partitions(partitions) idx.update_num_replicas(replicas) self.wait_for_indexing_complete(item_count=1000) return idx def get_zone_healthy_fts_nodes(self, zone=None): zone_nodes = self.rest.get_nodes_in_zone(zone) healthy_fts_nodes = [] for key in zone_nodes.keys(): node = zone_nodes[key] if node["status"] == 'healthy' and 'fts' in node['services']: for fts_node in self._cb_cluster.get_fts_nodes(): if key == fts_node.ip: healthy_fts_nodes.append(fts_node) return healthy_fts_nodes def query_node(self, index=None, node=None): hits, _, _, _ = index.execute_query(self.fts_query, node=node) return hits def eject_nodes(self, eject_nodes_structure=None, eject_type=None): eject_server_groups = eject_nodes_structure.split("|") eject_nodes = [] for eject_server_group in eject_server_groups: group_name = eject_server_group.split("-")[0] node_types = eject_server_group.split("-")[1] target_zone_nodes = self.rest.get_nodes_in_zone(group_name) node_type_arr = node_types.split(":") for node_type in node_type_arr: if 'D' == node_type: for kv_node in self._cb_cluster.get_kv_nodes(): if kv_node.ip in target_zone_nodes.keys(): if kv_node not in eject_nodes: eject_nodes.append(kv_node) break elif 'F' == node_type: for fts_node in self._cb_cluster.get_fts_nodes(): if fts_node.ip in target_zone_nodes.keys(): if fts_node not in eject_nodes: eject_nodes.append(fts_node) break else: self.fail("Unsupported node type found in nodes to eject.") for node in eject_nodes: if "remove" == eject_type: self._cb_cluster.rebalance_out_node(node=node) elif "failover" == eject_type: self._cb_cluster.failover(graceful=False, node=node) #self._cb_cluster.rebalance_failover_nodes() elif "shutdown" == eject_type: remote = RemoteMachineShellConnection(node) remote.stop_couchbase() self._cb_cluster.failover(graceful=False, node=node) self._cb_cluster.rebalance_failover_nodes() elif "shutdown_no_rebalance" == eject_type: remote = RemoteMachineShellConnection(node) remote.stop_couchbase() return eject_nodes def create_server_group(self, group_name=None): self.rest.add_zone(group_name) def _cleanup_server_groups(self): curr_server_groups = self.rest.get_zone_names() for g in curr_server_groups.keys(): if g != self.default_group_name: nodes = self.rest.get_nodes_in_zone(g) if nodes: nodes_to_move = [] for key in nodes.keys(): nodes_to_move.append(key) self.rest.shuffle_nodes_in_zones( moved_nodes=nodes_to_move, source_zone=g, target_zone=self.default_group_name) self.rest.delete_zone(g) def _maybe_rebalance(self): if not self.helper.is_cluster_rebalanced(): self._cb_cluster.rebalance_failover_nodes()
class PerfBase(unittest.TestCase): """ specURL = http://hub.internal.couchbase.org/confluence/display/cbit/Black+Box+Performance+Test+Matrix """ # The setUpBaseX() methods allow subclasses to resequence the setUp() and # skip cluster configuration. def setUpBase0(self): self.log = logger.Logger.get_logger() self.input = TestInputSingleton.input self.vbucket_count = PerfDefaults.vbuckets self.sc = None if self.parami("tear_down_on_setup", PerfDefaults.tear_down_on_setup) == 1: self.tearDown() # Tear down in case previous run had unclean death master = self.input.servers[0] self.set_up_rest(master) def setUpBase1(self): if max(self.parami('num_buckets', 1), self.parami('xdcr_num_buckets', 1)) > 1: bucket = 'bucket-0' else: bucket = self.param('bucket', 'default') vBuckets = self.rest.get_vbuckets(bucket) self.vbucket_count = len(vBuckets) if vBuckets else 0 def setUp(self): self.setUpBase0() mc_threads = self.parami("mc_threads", PerfDefaults.mc_threads) if mc_threads != PerfDefaults.mc_threads: for node in self.input.servers: self.set_mc_threads(node, mc_threads) erlang_schedulers = self.param("erlang_schedulers", PerfDefaults.erlang_schedulers) if erlang_schedulers: ClusterOperationHelper.set_erlang_schedulers(self.input.servers, erlang_schedulers) master = self.input.servers[0] self.is_multi_node = False self.data_path = master.data_path # Number of items loaded by load() method. # Does not include or count any items that came from set_up_dgm(). # self.num_items_loaded = 0 if self.input.clusters: for cluster in self.input.clusters.values(): master = cluster[0] self.set_up_rest(master) self.set_up_cluster(master) else: master = self.input.servers[0] self.set_up_cluster(master) # Rebalance num_nodes = self.parami("num_nodes", 10) self.rebalance_nodes(num_nodes) if self.input.clusters: for cluster in self.input.clusters.values(): master = cluster[0] self.set_up_rest(master) self.set_up_buckets() else: self.set_up_buckets() self.set_up_proxy() if self.input.clusters: for cluster in self.input.clusters.values(): master = cluster[0] self.set_up_rest(master) self.reconfigure() else: self.reconfigure() if self.parami("dgm", getattr(self, "dgm", 1)): self.set_up_dgm() time.sleep(10) self.setUpBase1() if self.input.clusters: for cluster in self.input.clusters.values(): self.wait_until_warmed_up(cluster[0]) else: self.wait_until_warmed_up() ClusterOperationHelper.flush_os_caches(self.input.servers) def set_up_rest(self, master): self.rest = RestConnection(master) self.rest_helper = RestHelper(self.rest) def set_up_cluster(self, master): """Initialize cluster""" self.log.info("setting up cluster") self.rest.init_cluster(master.rest_username, master.rest_password) memory_quota = self.parami('mem_quota', PerfDefaults.mem_quota) self.rest.init_cluster_memoryQuota(master.rest_username, master.rest_password, memoryQuota=memory_quota) def _get_bucket_names(self, num_buckets): """ Get a list of bucket names """ if num_buckets > 1: buckets = ['bucket-{0}'.format(i) for i in range(num_buckets)] else: buckets = [self.param('bucket', 'default')] return buckets def get_bucket_conf(self): """ retrieve bucket configurations""" num_buckets = max(self.parami('num_buckets', 1), self.parami('xdcr_num_buckets', 1)) self.buckets = self._get_bucket_names(num_buckets) def set_up_buckets(self): """Set up data bucket(s)""" self.log.info("setting up buckets") self.get_bucket_conf() for bucket in self.buckets: bucket_ram_quota = self.parami('mem_quota', PerfDefaults.mem_quota) bucket_ram_quota /= max(self.parami('num_buckets', 1), self.parami('xdcr_num_buckets', 1)) replicas = self.parami('replicas', getattr(self, 'replicas', 1)) index_replicas = self.parami('index_replicas', 1) self.rest.create_bucket(bucket=bucket, ramQuotaMB=bucket_ram_quota, replicaNumber=replicas, authType='sasl', replica_index=index_replicas) status = self.rest_helper.vbucket_map_ready(bucket, 60) self.assertTrue(status, msg='vbucket_map not ready .. timed out') status = self.rest_helper.bucket_exists(bucket) self.assertTrue(status, msg='unable to create {0} bucket'.format(bucket)) def reconfigure(self): """Customize basic Couchbase setup""" self.log.info("customizing setup") self.set_loglevel() self.customize_xdcr_settings() self.set_autocompaction() self.set_exp_pager_stime() self.set_rebalance_options() def set_rebalance_options(self): # rebalanceMovesBeforeCompaction rmbc = self.parami('rebalance_moves_before_compaction', 0) if rmbc: cmd = 'ns_config:set(rebalance_moves_before_compaction, {0}).'\ .format(rmbc) self.rest.diag_eval(cmd) def set_exp_pager_stime(self): exp_pager_stime = self.param('exp_pager_stime', PerfDefaults.exp_pager_stime) if exp_pager_stime != PerfDefaults.exp_pager_stime: self.set_ep_param('flush_param', 'exp_pager_stime', exp_pager_stime) def set_loglevel(self): """Set custom loglevel""" loglevel = self.param('loglevel', None) if loglevel: self.rest.set_global_loglevel(loglevel) def set_mc_threads(self, node, mc_threads): """Change number of memcached threads""" rest = RestConnection(node) rest.set_mc_threads(mc_threads) self.log.info("num of memcached threads = {0}".format(mc_threads)) def customize_xdcr_settings(self): """Set custom XDCR environment variables""" max_concurrent_reps_per_doc = self.param('max_concurrent_reps_per_doc', None) xdcr_doc_batch_size_kb = self.param('xdcr_doc_batch_size_kb', None) xdcr_checkpoint_interval = self.param('xdcr_checkpoint_interval', None) if max_concurrent_reps_per_doc: env_var = 'MAX_CONCURRENT_REPS_PER_DOC' value = max_concurrent_reps_per_doc elif xdcr_doc_batch_size_kb: env_var = 'XDCR_DOC_BATCH_SIZE_KB' value = xdcr_doc_batch_size_kb elif xdcr_checkpoint_interval: env_var = 'XDCR_CHECKPOINT_INTERVAL' value = xdcr_checkpoint_interval else: return self.log.info("changing {0} to {1}".format(env_var, value)) for server in self.input.servers: rc = RemoteMachineShellConnection(server) rc.set_environment_variable(env_var, value) def set_ep_compaction(self, comp_ratio): """Set up ep_engine side compaction ratio""" for server in self.input.servers: shell = RemoteMachineShellConnection(server) cmd = "/opt/couchbase/bin/cbepctl localhost:11210 "\ "set flush_param db_frag_threshold {0}".format(comp_ratio) self._exec_and_log(shell, cmd) shell.disconnect() def set_autocompaction(self, disable_view_compaction=False): """Set custom auto-compaction settings""" try: # Parallel database and view compaction parallel_compaction = self.param("parallel_compaction", PerfDefaults.parallel_compaction) # Database fragmentation threshold db_compaction = self.parami("db_compaction", PerfDefaults.db_compaction) self.log.info("database compaction = {0}".format(db_compaction)) # ep_engine fragementation threshold ep_compaction = self.parami("ep_compaction", PerfDefaults.ep_compaction) if ep_compaction != PerfDefaults.ep_compaction: self.set_ep_compaction(ep_compaction) self.log.info("ep_engine compaction = {0}".format(ep_compaction)) # View fragmentation threshold if disable_view_compaction: view_compaction = 100 else: view_compaction = self.parami("view_compaction", PerfDefaults.view_compaction) # Set custom auto-compaction settings self.rest.set_auto_compaction(parallelDBAndVC=parallel_compaction, dbFragmentThresholdPercentage=db_compaction, viewFragmntThresholdPercentage=view_compaction) except Exception as e: # It's very hard to determine what exception it can raise. # Therefore we have to use general handler. self.log.error("Error while changing compaction settings: {0}" .format(e)) def set_ep_param(self, type, param, value): """ Set ep-engine specific param, using cbepctl type: paramter type, e.g: flush_param, tap_param, etc """ bucket = Bucket(name=self.buckets[0], authType="sasl", saslPassword="") for server in self.input.servers: shell = RemoteMachineShellConnection(server) shell.execute_cbepctl(bucket, "", "set %s" % type, param, value) shell.disconnect() def tearDown(self): if self.parami("tear_down", 0) == 1: self.log.info("routine skipped") return self.log.info("routine starts") if self.parami("tear_down_proxy", 1) == 1: self.tear_down_proxy() else: self.log.info("proxy tearDown skipped") if self.sc is not None: self.sc.stop() self.sc = None if self.parami("tear_down_bucket", 0) == 1: self.tear_down_buckets() else: self.log.info("bucket tearDown skipped") if self.parami("tear_down_cluster", 1) == 1: self.tear_down_cluster() else: self.log.info("cluster tearDown skipped") self.log.info("routine finished") def tear_down_buckets(self): self.log.info("tearing down bucket") BucketOperationHelper.delete_all_buckets_or_assert(self.input.servers, self) self.log.info("bucket teared down") def tear_down_cluster(self): self.log.info("tearing down cluster") ClusterOperationHelper.cleanup_cluster(self.input.servers) ClusterOperationHelper.wait_for_ns_servers_or_assert(self.input.servers, self) self.log.info("Cluster teared down") def set_up_proxy(self, bucket=None): """Set up and start Moxi""" if self.input.moxis: self.log.info("setting up proxy") bucket = bucket or self.param('bucket', 'default') shell = RemoteMachineShellConnection(self.input.moxis[0]) shell.start_moxi(self.input.servers[0].ip, bucket, self.input.moxis[0].port) shell.disconnect() def tear_down_proxy(self): if len(self.input.moxis) > 0: shell = RemoteMachineShellConnection(self.input.moxis[0]) shell.stop_moxi() shell.disconnect() # Returns "host:port" of moxi to hit. def target_host_port(self, bucket='default', use_direct=False): rv = self.param('moxi', None) if use_direct: return "%s:%s" % (self.input.servers[0].ip, '11210') if rv: return rv if len(self.input.moxis) > 0: return "%s:%s" % (self.input.moxis[0].ip, self.input.moxis[0].port) return "%s:%s" % (self.input.servers[0].ip, self.rest.get_bucket(bucket).nodes[0].moxi) def protocol_parse(self, protocol_in, use_direct=False): if protocol_in.find('://') >= 0: if protocol_in.find("couchbase:") >= 0: protocol = "couchbase" else: protocol = \ '-'.join(((["membase"] + protocol_in.split("://"))[-2] + "-binary").split('-')[0:2]) host_port = ('@' + protocol_in.split("://")[-1]).split('@')[-1] user, pswd = (('@' + protocol_in.split("://")[-1]).split('@')[-2] + ":").split(':')[0:2] else: protocol = 'memcached-' + protocol_in host_port = self.target_host_port(use_direct=use_direct) user = self.param("rest_username", "Administrator") pswd = self.param("rest_password", "password") return protocol, host_port, user, pswd def mk_protocol(self, host, port='8091', prefix='membase-binary'): return self.param('protocol', prefix + '://' + host + ':' + port) def get_backups(self, protocol): """ Get backup server lists for memcached-binary """ port = protocol.split(":")[-1] return map(lambda server: "%s:%s" % (server.ip, port), self.input.servers[1:]) def restartProxy(self, bucket=None): self.tear_down_proxy() self.set_up_proxy(bucket) def set_up_dgm(self): """Download fragmented, DGM dataset onto each cluster node, if not already locally available. The number of vbuckets and database schema must match the target cluster. Shutdown all cluster nodes. Do a cluster-restore. Restart all cluster nodes.""" bucket = self.param("bucket", "default") ClusterOperationHelper.stop_cluster(self.input.servers) for server in self.input.servers: remote = RemoteMachineShellConnection(server) #TODO: Better way to pass num_nodes and db_size? self.get_data_files(remote, bucket, 1, 10) remote.disconnect() ClusterOperationHelper.start_cluster(self.input.servers) def get_data_files(self, remote, bucket, num_nodes, db_size): base = 'https://s3.amazonaws.com/database-analysis' dir = '/tmp/' if remote.is_couchbase_installed(): dir = dir + '/couchbase/{0}-{1}-{2}/'.format(num_nodes, 256, db_size) output, error = remote.execute_command('mkdir -p {0}'.format(dir)) remote.log_command_output(output, error) file = '{0}_cb.tar.gz'.format(bucket) base_url = base + '/couchbase/{0}-{1}-{2}/{3}'.format(num_nodes, 256, db_size, file) else: dir = dir + '/membase/{0}-{1}-{2}/'.format(num_nodes, 1024, db_size) output, error = remote.execute_command('mkdir -p {0}'.format(dir)) remote.log_command_output(output, error) file = '{0}_mb.tar.gz'.format(bucket) base_url = base + '/membase/{0}-{1}-{2}/{3}'.format(num_nodes, 1024, db_size, file) info = remote.extract_remote_info() wget_command = 'wget' if info.type.lower() == 'windows': wget_command = \ "cd {0} ;cmd /c 'c:\\automation\\wget.exe --no-check-certificate"\ .format(dir) # Check if the file exists on the remote server else download the gzipped version # Extract if necessary exist = remote.file_exists(dir, file) if not exist: additional_quote = "" if info.type.lower() == 'windows': additional_quote = "'" command = "{0} -v -O {1}{2} {3} {4} ".format(wget_command, dir, file, base_url, additional_quote) output, error = remote.execute_command(command) remote.log_command_output(output, error) if remote.is_couchbase_installed(): if info.type.lower() == 'windows': destination_folder = testconstants.WIN_COUCHBASE_DATA_PATH else: destination_folder = testconstants.COUCHBASE_DATA_PATH else: if info.type.lower() == 'windows': destination_folder = testconstants.WIN_MEMBASE_DATA_PATH else: destination_folder = testconstants.MEMBASE_DATA_PATH if self.data_path: destination_folder = self.data_path untar_command = 'cd {1}; tar -xzf {0}'.format(dir + file, destination_folder) output, error = remote.execute_command(untar_command) remote.log_command_output(output, error) def _exec_and_log(self, shell, cmd): """helper method to execute a command and log output""" if not cmd or not shell: return output, error = shell.execute_command(cmd) shell.log_command_output(output, error) def _build_tar_name(self, bucket, version="unknown_version", file_base=None): """build tar file name. {file_base}-{version}-{bucket}.tar.gz """ if not file_base: file_base = os.path.splitext( os.path.basename(self.param("conf_file", PerfDefaults.conf_file)))[0] return "{0}-{1}-{2}.tar.gz".format(file_base, version, bucket) def _save_snapshot(self, server, bucket, file_base=None): """Save data files to a snapshot""" src_data_path = os.path.dirname(server.data_path or testconstants.COUCHBASE_DATA_PATH) dest_data_path = "{0}-snapshots".format(src_data_path) self.log.info("server={0}, src_data_path={1}, dest_data_path={2}" .format(server.ip, src_data_path, dest_data_path)) shell = RemoteMachineShellConnection(server) build_name, short_version, full_version = \ shell.find_build_version("/opt/couchbase/", "VERSION.txt", "cb") dest_file = self._build_tar_name(bucket, full_version, file_base) self._exec_and_log(shell, "mkdir -p {0}".format(dest_data_path)) # save as gzip file, if file exsits, overwrite # TODO: multiple buckets zip_cmd = "cd {0}; tar -cvzf {1}/{2} {3} {3}-data _*"\ .format(src_data_path, dest_data_path, dest_file, bucket) self._exec_and_log(shell, zip_cmd) shell.disconnect() return True def _load_snapshot(self, server, bucket, file_base=None, overwrite=True): """Load data files from a snapshot""" dest_data_path = os.path.dirname(server.data_path or testconstants.COUCHBASE_DATA_PATH) src_data_path = "{0}-snapshots".format(dest_data_path) self.log.info("server={0}, src_data_path={1}, dest_data_path={2}" .format(server.ip, src_data_path, dest_data_path)) shell = RemoteMachineShellConnection(server) build_name, short_version, full_version = \ shell.find_build_version("/opt/couchbase/", "VERSION.txt", "cb") src_file = self._build_tar_name(bucket, full_version, file_base) if not shell.file_exists(src_data_path, src_file): self.log.error("file '{0}/{1}' does not exist" .format(src_data_path, src_file)) shell.disconnect() return False if not overwrite: self._save_snapshot(server, bucket, "{0}.tar.gz".format( time.strftime(PerfDefaults.strftime))) # TODO: filename rm_cmd = "rm -rf {0}/{1} {0}/{1}-data {0}/_*".format(dest_data_path, bucket) self._exec_and_log(shell, rm_cmd) unzip_cmd = "cd {0}; tar -xvzf {1}/{2}".format(dest_data_path, src_data_path, src_file) self._exec_and_log(shell, unzip_cmd) shell.disconnect() return True def save_snapshots(self, file_base, bucket): """Save snapshots on all servers""" if not self.input.servers or not bucket: self.log.error("invalid server list or bucket name") return False ClusterOperationHelper.stop_cluster(self.input.servers) for server in self.input.servers: self._save_snapshot(server, bucket, file_base) ClusterOperationHelper.start_cluster(self.input.servers) return True def load_snapshots(self, file_base, bucket): """Load snapshots on all servers""" if not self.input.servers or not bucket: self.log.error("invalid server list or bucket name") return False ClusterOperationHelper.stop_cluster(self.input.servers) for server in self.input.servers: if not self._load_snapshot(server, bucket, file_base): ClusterOperationHelper.start_cluster(self.input.servers) return False ClusterOperationHelper.start_cluster(self.input.servers) return True def spec(self, reference): self.spec_reference = self.param("spec", reference) def mk_stats(self, verbosity): return StatsCollector(verbosity) def _get_src_version(self): """get testrunner version""" try: result = subprocess.Popen(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE).communicate()[0] except subprocess.CalledProcessError as e: self.log.error("unable to get src code version : {0}".format(e)) return "unknown version" return result.rstrip()[:7] def start_stats(self, stats_spec, servers=None, process_names=('memcached', 'beam.smp'), test_params=None, client_id='', collect_server_stats=True, ddoc=None): if self.parami('stats', 1) == 0: return None servers = servers or self.input.servers clusters = None if hasattr(self, "get_region"): if self.parami("access_phase", 0): clusters = self.input.clusters if self.get_region() == "west": clusters[0], clusters[1] = clusters[1], clusters[0] sc = self.mk_stats(False) bucket = self.param("bucket", "default") sc.start(servers, bucket, process_names, stats_spec, client_id, collect_server_stats=collect_server_stats, ddoc=ddoc, clusters=clusters) test_params['testrunner'] = self._get_src_version() self.test_params = test_params self.sc = sc return self.sc def end_stats(self, sc, total_stats=None, stats_spec=None): if sc is None: return if stats_spec is None: stats_spec = self.spec_reference if total_stats: sc.total_stats(total_stats) self.log.info("stopping stats collector") sc.stop() self.log.info("stats collector is stopped") sc.export(stats_spec, self.test_params) def load(self, num_items, min_value_size=None, kind='binary', protocol='binary', ratio_sets=1.0, ratio_hot_sets=0.0, ratio_hot_gets=0.0, ratio_expirations=0.0, expiration=None, prefix="", doc_cache=1, use_direct=True, report=0, start_at= -1, collect_server_stats=True, is_eperf=False, hot_shift=0): cfg = {'max-items': num_items, 'max-creates': num_items, 'max-ops-per-sec': self.parami("load_mcsoda_max_ops_sec", PerfDefaults.mcsoda_max_ops_sec), 'min-value-size': min_value_size or self.parami("min_value_size", 1024), 'ratio-sets': self.paramf("load_ratio_sets", ratio_sets), 'ratio-misses': self.paramf("load_ratio_misses", 0.0), 'ratio-creates': self.paramf("load_ratio_creates", 1.0), 'ratio-deletes': self.paramf("load_ratio_deletes", 0.0), 'ratio-hot': 0.0, 'ratio-hot-sets': ratio_hot_sets, 'ratio-hot-gets': ratio_hot_gets, 'ratio-expirations': ratio_expirations, 'expiration': expiration or 0, 'exit-after-creates': 1, 'json': int(kind == 'json'), 'batch': self.parami("batch", PerfDefaults.batch), 'vbuckets': self.vbucket_count, 'doc-cache': doc_cache, 'prefix': prefix, 'report': report, 'hot-shift': hot_shift, 'cluster_name': self.param("cluster_name", "")} cur = {} if start_at >= 0: cur['cur-items'] = start_at cur['cur-gets'] = start_at cur['cur-sets'] = start_at cur['cur-ops'] = cur['cur-gets'] + cur['cur-sets'] cur['cur-creates'] = start_at cfg['max-creates'] = start_at + num_items cfg['max-items'] = cfg['max-creates'] cfg_params = cfg.copy() cfg_params['test_time'] = time.time() cfg_params['test_name'] = self.id() # phase: 'load' or 'reload' phase = "load" if self.parami("hot_load_phase", 0) == 1: # all gets if self.parami("hot_load_get", PerfDefaults.hot_load_get) == 1: cfg['ratio-sets'] = 0 cfg['exit-after-creates'] = 0 cfg['exit-after-gets'] = 1 cfg['max-gets'] = start_at + num_items phase = "reload" if is_eperf: collect_server_stats = self.parami("prefix", 0) == 0 client_id = self.parami("prefix", 0) sc = self.start_stats("{0}.{1}".format(self.spec_reference, phase), # stats spec e.x: testname.load test_params=cfg_params, client_id=client_id, collect_server_stats=collect_server_stats) # For Black box, multi node tests # always use membase-binary if self.is_multi_node: protocol = self.mk_protocol(host=self.input.servers[0].ip, port=self.input.servers[0].port) protocol, host_port, user, pswd = \ self.protocol_parse(protocol, use_direct=use_direct) if not user.strip(): if "11211" in host_port: user = self.param("bucket", "default") else: user = self.input.servers[0].rest_username if not pswd.strip(): if not "11211" in host_port: pswd = self.input.servers[0].rest_password self.log.info("mcsoda %s %s %s %s" % (protocol, host_port, user, pswd)) self.log.info("mcsoda cfg:\n" + pprint.pformat(cfg)) self.log.info("mcsoda cur:\n" + pprint.pformat(cfg)) cur, start_time, end_time = \ self.mcsoda_run(cfg, cur, protocol, host_port, user, pswd, heartbeat=self.parami("mcsoda_heartbeat", 0), why="load", bucket=self.param("bucket", "default")) self.num_items_loaded = num_items ops = {'tot-sets': cur.get('cur-sets', 0), 'tot-gets': cur.get('cur-gets', 0), 'tot-items': cur.get('cur-items', 0), 'tot-creates': cur.get('cur-creates', 0), 'tot-misses': cur.get('cur-misses', 0), "start-time": start_time, "end-time": end_time} if is_eperf: if self.parami("load_wait_until_drained", 1) == 1: self.wait_until_drained() if self.parami("load_wait_until_repl", PerfDefaults.load_wait_until_repl) == 1: self.wait_until_repl() self.end_stats(sc, ops, "{0}.{1}".format(self.spec_reference, phase)) return ops, start_time, end_time def mcsoda_run(self, cfg, cur, protocol, host_port, user, pswd, stats_collector=None, stores=None, ctl=None, heartbeat=0, why="", bucket="default", backups=None): return mcsoda.run(cfg, cur, protocol, host_port, user, pswd, stats_collector=stats_collector, stores=stores, ctl=ctl, heartbeat=heartbeat, why=why, bucket=bucket, backups=backups) def rebalance_nodes(self, num_nodes): """Rebalance cluster(s) if more than 1 node provided""" if len(self.input.servers) == 1 or num_nodes == 1: self.log.warn("running on single node cluster") return else: self.log.info("rebalancing nodes - num_nodes = {0}" .format(num_nodes)) if self.input.clusters: for cluster in self.input.clusters.values(): status, _ = RebalanceHelper.rebalance_in(cluster, num_nodes - 1, do_shuffle=False) self.assertTrue(status) else: status, _ = RebalanceHelper.rebalance_in(self.input.servers, num_nodes - 1, do_shuffle=False) self.assertTrue(status) def delayed_rebalance_worker(self, servers, num_nodes, delay_seconds, sc, max_retries=PerfDefaults.reb_max_retries, reb_mode=PerfDefaults.REB_MODE.IN): time.sleep(delay_seconds) gmt_now = time.strftime(PerfDefaults.strftime, time.gmtime()) self.log.info("rebalance started") if not sc: self.log.error("invalid stats collector") return status = False retries = 0 while not status and retries <= max_retries: start_time = time.time() if reb_mode == PerfDefaults.REB_MODE.OUT: status, nodes = RebalanceHelper.rebalance_out(servers, num_nodes) elif reb_mode == PerfDefaults.REB_MODE.SWAP: status, nodes = RebalanceHelper.rebalance_swap(servers, num_nodes) else: status, nodes = RebalanceHelper.rebalance_in(servers, num_nodes - 1, do_check=(not retries)) end_time = time.time() self.log.info("status: {0}, nodes: {1}, retries: {2}" .format(status, nodes, retries)) if not status: retries += 1 time.sleep(delay_seconds) sc.reb_stats(start_time, end_time - start_time) if self.parami("master_events", PerfDefaults.master_events): filename = "master_events.log" with open(filename, "w") as f: f.write(self.rest.diag_master_events()[1]) def delayed_rebalance(self, num_nodes, delay_seconds=10, max_retries=PerfDefaults.reb_max_retries, reb_mode=0, sync=False): self.log.info("delayed_rebalance") if sync: PerfBase.delayed_rebalance_worker(self, self.input.servers, num_nodes, delay_seconds, self.sc, max_retries, reb_mode) else: t = threading.Thread(target=PerfBase.delayed_rebalance_worker, args=(self, self.input.servers, num_nodes, delay_seconds, self.sc, max_retries, reb_mode)) t.daemon = True t.start() @staticmethod def set_auto_compaction(server, parallel_compaction, percent_threshold): rest = RestConnection(server) rest.set_auto_compaction(parallel_compaction, dbFragmentThresholdPercentage=percent_threshold, viewFragmntThresholdPercentage=percent_threshold) @staticmethod def delayed_compaction_worker(servers, parallel_compaction, percent_threshold, delay_seconds): time.sleep(delay_seconds) PerfBase.set_auto_compaction(servers[0], parallel_compaction, percent_threshold) def delayed_compaction(self, parallel_compaction="false", percent_threshold=0.01, delay_seconds=10): t = threading.Thread(target=PerfBase.delayed_compaction_worker, args=(self.input.servers, parallel_compaction, percent_threshold, delay_seconds)) t.daemon = True t.start() def loop(self, num_ops=None, num_items=None, max_items=None, max_creates=None, min_value_size=None, exit_after_creates=0, kind='binary', protocol='binary', clients=1, ratio_misses=0.0, ratio_sets=0.0, ratio_creates=0.0, ratio_deletes=0.0, ratio_hot=0.2, ratio_hot_sets=0.95, ratio_hot_gets=0.95, ratio_expirations=0.0, expiration=None, test_name=None, prefix="", doc_cache=1, use_direct=True, collect_server_stats=True, start_at= -1, report=0, ctl=None, hot_shift=0, is_eperf=False, ratio_queries=0, queries=0, ddoc=None): num_items = num_items or self.num_items_loaded hot_stack_size = \ self.parami('hot_stack_size', PerfDefaults.hot_stack_size) or \ (num_items * ratio_hot) cfg = {'max-items': max_items or num_items, 'max-creates': max_creates or 0, 'max-ops-per-sec': self.parami("mcsoda_max_ops_sec", PerfDefaults.mcsoda_max_ops_sec), 'min-value-size': min_value_size or self.parami("min_value_size", 1024), 'exit-after-creates': exit_after_creates, 'ratio-sets': ratio_sets, 'ratio-misses': ratio_misses, 'ratio-creates': ratio_creates, 'ratio-deletes': ratio_deletes, 'ratio-hot': ratio_hot, 'ratio-hot-sets': ratio_hot_sets, 'ratio-hot-gets': ratio_hot_gets, 'ratio-expirations': ratio_expirations, 'ratio-queries': ratio_queries, 'expiration': expiration or 0, 'threads': clients, 'json': int(kind == 'json'), 'batch': self.parami("batch", PerfDefaults.batch), 'vbuckets': self.vbucket_count, 'doc-cache': doc_cache, 'prefix': prefix, 'queries': queries, 'report': report, 'hot-shift': hot_shift, 'hot-stack': self.parami("hot_stack", PerfDefaults.hot_stack), 'hot-stack-size': hot_stack_size, 'hot-stack-rotate': self.parami("hot_stack_rotate", PerfDefaults.hot_stack_rotate), 'cluster_name': self.param("cluster_name", ""), 'observe': self.param("observe", PerfDefaults.observe), 'obs-backoff': self.paramf('obs_backoff', PerfDefaults.obs_backoff), 'obs-max-backoff': self.paramf('obs_max_backoff', PerfDefaults.obs_max_backoff), 'obs-persist-count': self.parami('obs_persist_count', PerfDefaults.obs_persist_count), 'obs-repl-count': self.parami('obs_repl_count', PerfDefaults.obs_repl_count), 'woq-pattern': self.parami('woq_pattern', PerfDefaults.woq_pattern), 'woq-verbose': self.parami('woq_verbose', PerfDefaults.woq_verbose), 'cor-pattern': self.parami('cor_pattern', PerfDefaults.cor_pattern), 'cor-persist': self.parami('cor_persist', PerfDefaults.cor_persist), 'time': self.parami('time', 0), 'cbm': self.parami('cbm', PerfDefaults.cbm), 'cbm-host': self.param('cbm_host', PerfDefaults.cbm_host), 'cbm-port': self.parami('cbm_port', PerfDefaults.cbm_port)} cfg_params = cfg.copy() cfg_params['test_time'] = time.time() cfg_params['test_name'] = test_name client_id = '' stores = None if is_eperf: client_id = self.parami("prefix", 0) sc = None if self.parami("collect_stats", 1): sc = self.start_stats(self.spec_reference + ".loop", test_params=cfg_params, client_id=client_id, collect_server_stats=collect_server_stats, ddoc=ddoc) self.cur = {'cur-items': num_items} if start_at >= 0: self.cur['cur-gets'] = start_at if num_ops is None: num_ops = num_items if isinstance(num_ops, int): cfg['max-ops'] = num_ops else: # Here, we num_ops looks like "time to run" tuple of... # ('seconds', integer_num_of_seconds_to_run) cfg['time'] = num_ops[1] # For Black box, multi node tests # always use membase-binary if self.is_multi_node: protocol = self.mk_protocol(host=self.input.servers[0].ip, port=self.input.servers[0].port) backups = self.get_backups(protocol) self.log.info("mcsoda protocol %s" % protocol) protocol, host_port, user, pswd = \ self.protocol_parse(protocol, use_direct=use_direct) if not user.strip(): if "11211" in host_port: user = self.param("bucket", "default") else: user = self.input.servers[0].rest_username if not pswd.strip(): if not "11211" in host_port: pswd = self.input.servers[0].rest_password self.log.info("mcsoda %s %s %s %s" % (protocol, host_port, user, pswd)) self.log.info("mcsoda cfg:\n" + pprint.pformat(cfg)) self.log.info("mcsoda cur:\n" + pprint.pformat(cfg)) self.log.info("mcsoda backups: %s" % backups) # For query tests always use StoreCouchbase if protocol == "couchbase": stores = [StoreCouchbase()] self.cur, start_time, end_time = \ self.mcsoda_run(cfg, self.cur, protocol, host_port, user, pswd, stats_collector=sc, ctl=ctl, stores=stores, heartbeat=self.parami("mcsoda_heartbeat", 0), why="loop", bucket=self.param("bucket", "default"), backups=backups) ops = {'tot-sets': self.cur.get('cur-sets', 0), 'tot-gets': self.cur.get('cur-gets', 0), 'tot-items': self.cur.get('cur-items', 0), 'tot-creates': self.cur.get('cur-creates', 0), 'tot-misses': self.cur.get('cur-misses', 0), "start-time": start_time, "end-time": end_time} if self.parami("loop_wait_until_drained", PerfDefaults.loop_wait_until_drained): self.wait_until_drained() if self.parami("loop_wait_until_repl", PerfDefaults.loop_wait_until_repl): self.wait_until_repl() if self.parami("collect_stats", 1) and \ not self.parami("reb_no_fg", PerfDefaults.reb_no_fg): self.end_stats(sc, ops, self.spec_reference + ".loop") why = self.params("why", "main") prefix = self.parami("prefix", 0) self.log.info("finished") return ops, start_time, end_time def wait_until_drained(self): self.log.info("draining disk write queue") master = self.input.servers[0] bucket = self.param("bucket", "default") RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_queue_size', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_flusher_todo', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) self.log.info("disk write queue has been drained") return time.time() def wait_until_repl(self): self.log.info("waiting for replication") master = self.input.servers[0] bucket = self.param("bucket", "default") RebalanceHelper.wait_for_stats_on_all(master, bucket, 'vb_replica_queue_size', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_tap_replica_queue_itemondisk', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_tap_rebalance_queue_backfillremaining', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_tap_replica_qlen', 0, fn=RebalanceHelper.wait_for_stats_no_timeout) self.log.info("replication is done") def warmup(self, collect_stats=True, flush_os_cache=False): """ Restart cluster and wait for it to warm up. In current version, affect the master node only. """ if not self.input.servers: self.log.error("empty server list") return if collect_stats: client_id = self.parami("prefix", 0) test_params = {'test_time': time.time(), 'test_name': self.id(), 'json': 0} sc = self.start_stats(self.spec_reference + ".warmup", test_params=test_params, client_id=client_id) self.log.info("preparing to warmup cluster ...") server = self.input.servers[0] shell = RemoteMachineShellConnection(server) start_time = time.time() self.log.info("stopping couchbase ... ({0})".format(server.ip)) shell.stop_couchbase() self.log.info("couchbase stopped ({0})".format(server.ip)) if flush_os_cache: self.log.info("flushing os cache ...") shell.flush_os_caches() shell.start_couchbase() self.log.info("couchbase restarted ({0})".format(server.ip)) self.wait_until_warmed_up() self.log.info("warmup finished") end_time = time.time() ops = {'tot-sets': 0, 'tot-gets': 0, 'tot-items': 0, 'tot-creates': 0, 'tot-misses': 0, "start-time": start_time, "end-time": end_time} if collect_stats: self.end_stats(sc, ops, self.spec_reference + ".warmup") def wait_until_warmed_up(self, master=None): if not master: master = self.input.servers[0] bucket = self.param("bucket", "default") fn = RebalanceHelper.wait_for_mc_stats_no_timeout for bucket in self.buckets: RebalanceHelper.wait_for_stats_on_all(master, bucket, 'ep_warmup_thread', 'complete', fn=fn) def set_param(self, name, val): input = getattr(self, "input", TestInputSingleton.input) input.test_params[name] = str(val) return True def wait_for_task_completion(self, task='indexer'): """Wait for ns_server task to finish""" t0 = time.time() self.log.info("Waiting 30 seconds before {0} monitoring".format(task)) time.sleep(30) while True: tasks = self.rest.ns_server_tasks() if tasks: try: progress = [t['progress'] for t in tasks if t['type'] == task] except TypeError: self.log.error(tasks) else: if progress: self.log.info("{0} progress: {1}".format(task, progress)) time.sleep(10) else: break t1 = time.time() self.log.info("Time taken to perform task: {0} sec".format(t1 - t0)) def param(self, name, default_value): input = getattr(self, "input", TestInputSingleton.input) return input.test_params.get(name, default_value) def parami(self, name, default_int): return int(self.param(name, default_int)) def paramf(self, name, default_float): return float(self.param(name, default_float)) def params(self, name, default_str): return str(self.param(name, default_str))
def test_analytics_volume(self): queries = ['SELECT VALUE u FROM `GleambookUsers_ds` u WHERE u.user_since >= "2010-09-13T16-48-15" AND u.user_since < "2010-10-13T16-48-15" AND (SOME e IN u.employment SATISFIES e.end_date IS UNKNOWN) LIMIT 100;', 'SELECT VALUE u FROM `GleambookUsers_ds` u WHERE u.user_since >= "2010-11-13T16-48-15" AND u.user_since < "2010-12-13T16-48-15" limit 1;', ] nodes_in_cluster= [self.servers[0],self.cbas_node] print "Start Time: %s"%str(time.strftime("%H:%M:%S", time.gmtime(time.time()))) ######################################################################################################################## self.log.info("Step 1: Start the test with 2 KV and 2 CBAS nodes") self.log.info("Add a N1QL/Index nodes") self.query_node = self.servers[1] rest = RestConnection(self.query_node) rest.set_data_path(data_path=self.query_node.data_path,index_path=self.query_node.index_path,cbas_path=self.query_node.cbas_path) result = self.add_node(self.query_node, rebalance=False) self.assertTrue(result, msg="Failed to add N1QL/Index node.") self.log.info("Add a KV nodes") rest = RestConnection(self.kv_servers[1]) rest.set_data_path(data_path=self.kv_servers[1].data_path,index_path=self.kv_servers[1].index_path,cbas_path=self.kv_servers[1].cbas_path) result = self.add_node(self.kv_servers[1], services=["kv"], rebalance=False) self.assertTrue(result, msg="Failed to add KV node.") self.log.info("Add one more KV node") rest = RestConnection(self.kv_servers[3]) rest.set_data_path(data_path=self.kv_servers[3].data_path,index_path=self.kv_servers[3].index_path,cbas_path=self.kv_servers[3].cbas_path) result = self.add_node(self.kv_servers[3], services=["kv"], rebalance=False) self.assertTrue(result, msg="Failed to add KV node.") self.log.info("Add one more KV node") rest = RestConnection(self.kv_servers[4]) rest.set_data_path(data_path=self.kv_servers[4].data_path,index_path=self.kv_servers[4].index_path,cbas_path=self.kv_servers[4].cbas_path) result = self.add_node(self.kv_servers[4], services=["kv"], rebalance=False) self.assertTrue(result, msg="Failed to add KV node.") self.log.info("Add a CBAS nodes") result = self.add_node(self.cbas_servers[0], services=["cbas"], rebalance=True) self.assertTrue(result, msg="Failed to add CBAS node.") nodes_in_cluster = nodes_in_cluster + [self.query_node, self.kv_servers[1], self.kv_servers[3], self.kv_servers[4], self.cbas_servers[0]] ######################################################################################################################## self.log.info("Step 2: Create Couchbase buckets.") self.create_required_buckets() ######################################################################################################################## self.log.info("Step 3: Create 10M docs average of 1k docs for 8 couchbase buckets.") GleambookUsers = buck(name="GleambookUsers", authType=None, saslPassword=None, num_replicas=self.num_replicas, bucket_size=self.bucket_size, eviction_policy='noEviction', lww=self.lww) items_start_from = 0 total_num_items = self.input.param("num_items",1000000) num_query = self.input.param("num_query",240) self.use_replica_to = False self.rate_limit = self.input.param('rate_limit', '100000') load_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_load", args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0)) self.log.info('starting the load thread...') load_thread.start() load_thread.join() updates_from = items_start_from deletes_from = items_start_from + total_num_items/10 items_start_from += total_num_items ######################################################################################################################## self.log.info("Step 4: Create 8 analytics buckets and 8 datasets and connect.") load_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_load", args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0)) self.log.info('starting the load thread...') load_thread.start() items_start_from += total_num_items self.setup_cbas() load_thread.join() ######################################################################################################################## self.log.info("Step 5: Wait for ingestion to complete.") self.sleep(10,"Wait for the ingestion to complete") ######################################################################################################################## self.log.info("Step 6: Verify the items count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 7: Disconnect CBAS bucket and create secondary indexes.") self.disconnect_cbas_buckets() self.create_cbas_indexes() ######################################################################################################################## self.log.info("Step 8: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items",5000) executors=[] query_executors = 1 num_executors = query_executors upsert_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_delete", args=(self.master, GleambookUsers, num_items/10,10000,4, updates_from,1, 0)) delete_thread = Thread(target=self.delete_buckets_with_high_ops, name="high_ops_delete", args=(self.master, GleambookUsers, num_items/10, self.rate_limit, 10000, 2, deletes_from,1)) delete_thread.start() upsert_thread.start() for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) delete_thread.join() upsert_thread.join() ######################################################################################################################## self.log.info("Step 9: Connect cbas buckets.") self.connect_cbas_buckets() self.sleep(10,"Wait for the ingestion to complete") ######################################################################################################################## self.log.info("Step 10: Verify the items count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 12: When 11 is in progress do a KV Rebalance in of 1 nodes.") rest = RestConnection(self.kv_servers[2]) rest.set_data_path(data_path=self.kv_servers[2].data_path,index_path=self.kv_servers[2].index_path,cbas_path=self.kv_servers[2].cbas_path) rebalance = self.cluster.async_rebalance(nodes_in_cluster, [self.kv_servers[2]], []) nodes_in_cluster += [self.kv_servers[2]] ######################################################################################################################## self.log.info("Step 11: Create 10M docs.") pool = Executors.newFixedThreadPool(5) total_num_items = self.input.param("num_items",1000000) num_query = self.input.param("num_query",240) self.use_replica_to = False self.rate_limit = self.input.param('rate_limit', '100000') load_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_load", args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0)) self.log.info('starting the load thread...') load_thread.start() for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) load_thread.join() updates_from = items_start_from deletes_from = items_start_from + total_num_items/10 items_start_from += total_num_items ######################################################################################################################## self.log.info("Step 13: Wait for rebalance to complete.") reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.sleep(20) ######################################################################################################################## self.log.info("Step 14: Verify the items count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 15: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items",5000) executors=[] query_executors = 1 num_executors = query_executors upsert_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_delete", args=(self.master, GleambookUsers, num_items/10,10000,4, updates_from,1, 0)) delete_thread = Thread(target=self.delete_buckets_with_high_ops, name="high_ops_delete", args=(self.master, GleambookUsers, num_items/10, self.rate_limit, 10000, 2, deletes_from,1)) delete_thread.start() upsert_thread.start() for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) delete_thread.join() upsert_thread.join() ######################################################################################################################## self.log.info("Step 16: Verify Results that 1M docs gets deleted from analytics datasets.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 17: Disconnect CBAS buckets.") self.disconnect_cbas_buckets() ######################################################################################################################## self.log.info("Step 18: Create 10M docs.") pool = Executors.newFixedThreadPool(5) total_num_items = self.input.param("num_items",1000000) num_query = self.input.param("num_query",240) self.use_replica_to = False self.rate_limit = self.input.param('rate_limit', '100000') load_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_load", args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0)) self.log.info('starting the load thread...') load_thread.start() for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) load_thread.join() updates_from = items_start_from deletes_from = items_start_from + total_num_items/10 items_start_from += total_num_items ######################################################################################################################## self.log.info("Step 19: Multiple Connect/Disconnect CBAS buckets during ingestion in step 18.") self.connect_cbas_buckets() self.sleep(5) self.disconnect_cbas_buckets() self.connect_cbas_buckets() self.sleep(5) self.disconnect_cbas_buckets() self.connect_cbas_buckets() self.sleep(5) self.disconnect_cbas_buckets() self.connect_cbas_buckets() ######################################################################################################################## self.log.info("Step 20: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 21: Run 500 complex queries concurrently and verify the results.") pool = Executors.newFixedThreadPool(5) num_query = self.input.param("num_query",500) executors=[] num_executors = 5 query_executors = num_executors for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) self.log.info("Step 22: When 21 is in progress do a KV Rebalance out of 2 nodes.") rebalance = self.cluster.async_rebalance(nodes_in_cluster, [], self.kv_servers[1:2]) nodes_in_cluster = [node for node in nodes_in_cluster if node not in self.kv_servers[1:2]] futures = pool.invokeAll(executors) self.log.info("Step 23: Wait for rebalance.") reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.sleep(20) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) ######################################################################################################################## self.log.info("Step 24: Create 10M docs.") pool = Executors.newFixedThreadPool(5) executors=[] num_executors = 2 query_executors = num_executors total_num_items = self.input.param("num_items",1000000) num_query = self.input.param("num_query",240) self.use_replica_to = False self.rate_limit = self.input.param('rate_limit', '100000') load_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_load", args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0)) self.log.info('starting the load thread...') load_thread.start() for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) self.log.info("Step 26: Run 500 complex queries concurrently and verify the results.") executors.append(QueryRunner(random.choice(queries),500,self.cbas_util)) ##################################################### NEED TO BE UPDATED ################################################################## self.log.info("Step 25: When 24 is in progress do a CBAS Rebalance in of 2 nodes.") for node in self.cbas_servers[2:]: rest = RestConnection(node) rest.set_data_path(data_path=node.data_path,index_path=node.index_path,cbas_path=node.cbas_path) rebalance = self.cluster.async_rebalance(nodes_in_cluster, self.cbas_servers[1:],[],services=["cbas","cbas"]) nodes_in_cluster = nodes_in_cluster + self.cbas_servers[1:] futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) load_thread.join() updates_from = items_start_from deletes_from = items_start_from + total_num_items/10 items_start_from += total_num_items self.log.info("Step 27: Wait for rebalance to complete.") reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") ######################################################################################################################## self.log.info("Step 28: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 29: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items",5000) executors=[] query_executors = 1 num_executors = query_executors upsert_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_delete", args=(self.master, GleambookUsers, num_items/10,10000,4, updates_from,1, 0)) delete_thread = Thread(target=self.delete_buckets_with_high_ops, name="high_ops_delete", args=(self.master, GleambookUsers, num_items/10, self.rate_limit, 10000, 2, deletes_from,1)) delete_thread.start() upsert_thread.start() for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) delete_thread.join() upsert_thread.join() ######################################################################################################################## self.log.info("Step 30: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 31: Create 10M docs.") pool = Executors.newFixedThreadPool(5) total_num_items = self.input.param("num_items",1000000) num_query = self.input.param("num_query",240) self.use_replica_to = False self.rate_limit = self.input.param('rate_limit', '100000') load_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_load", args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0)) self.log.info('starting the load thread...') load_thread.start() for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) ###################################################### NEED TO BE UPDATED ################################################################## self.log.info("Step 32: When 31 is in progress do a CBAS Rebalance out of 1 nodes.") rebalance = self.cluster.async_rebalance(nodes_in_cluster, [], self.cbas_servers[-1:]) nodes_in_cluster = [node for node in nodes_in_cluster if node not in self.cbas_servers[-1:]] futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) load_thread.join() updates_from = items_start_from deletes_from = items_start_from + total_num_items/10 items_start_from += total_num_items ####################################################################################################################### self.log.info("Step 33: Wait for rebalance to complete.") reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.sleep(20) ######################################################################################################################## self.log.info("Step 34: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 35: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items",5000) executors=[] query_executors = 1 num_executors = query_executors upsert_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_delete", args=(self.master, GleambookUsers, num_items/10,10000,4, updates_from,1, 0)) delete_thread = Thread(target=self.delete_buckets_with_high_ops, name="high_ops_delete", args=(self.master, GleambookUsers, num_items/10, self.rate_limit, 10000, 2, deletes_from,1)) delete_thread.start() upsert_thread.start() for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) delete_thread.join() upsert_thread.join() ######################################################################################################################## self.log.info("Step 36: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 37: Create 10M docs.") pool = Executors.newFixedThreadPool(5) total_num_items = self.input.param("num_items",1000000) num_query = self.input.param("num_query",240) self.use_replica_to = False self.rate_limit = self.input.param('rate_limit', '100000') load_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_load", args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0)) self.log.info('starting the load thread...') load_thread.start() for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) ###################################################### NEED TO BE UPDATED ################################################################## self.log.info("Step 38: When 37 is in progress do a CBAS CC SWAP Rebalance of 2 nodes.") for node in self.cbas_servers[-1:]: rest = RestConnection(node) rest.set_data_path(data_path=node.data_path,index_path=node.index_path,cbas_path=node.cbas_path) rebalance = self.cluster.async_rebalance(nodes_in_cluster,self.cbas_servers[-1:], [self.cbas_node],services=["cbas"],check_vbucket_shuffling=False) nodes_in_cluster += self.cbas_servers[-1:] nodes_in_cluster.remove(self.cbas_node) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) load_thread.join() ######################################################################################################################## self.log.info("Step 39: Wait for rebalance to complete.") reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.sleep(20) updates_from = items_start_from deletes_from = items_start_from + total_num_items/10 items_start_from += total_num_items ######################################################################################################################## self.log.info("Step 40: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 41: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items",5000) executors=[] query_executors = 1 num_executors = query_executors upsert_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_delete", args=(self.master, GleambookUsers, num_items/10,10000,4, updates_from,1, 0)) delete_thread = Thread(target=self.delete_buckets_with_high_ops, name="high_ops_delete", args=(self.master, GleambookUsers, num_items/10, self.rate_limit, 10000, 2, deletes_from,1)) delete_thread.start() upsert_thread.start() for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) delete_thread.join() upsert_thread.join() ######################################################################################################################## self.log.info("Step 42: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 43: Create 10M docs.") pool = Executors.newFixedThreadPool(5) total_num_items = self.input.param("num_items",1000000) num_query = self.input.param("num_query",240) self.use_replica_to = False self.rate_limit = self.input.param('rate_limit', '100000') load_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_load", args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0)) self.log.info('starting the load thread...') load_thread.start() for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) ###################################################### NEED TO BE UPDATED ################################################################## self.log.info("Step 44: When 43 is in progress do a KV+CBAS Rebalance IN.") rest = RestConnection(self.cbas_node) rest.set_data_path(data_path=self.cbas_node.data_path,index_path=self.cbas_node.index_path,cbas_path=self.cbas_node.cbas_path) rebalance = self.cluster.async_rebalance(nodes_in_cluster, [self.cbas_node], [],services=["cbas"]) nodes_in_cluster += [self.cbas_node] self.assertTrue(reached, "rebalance failed, stuck or did not complete") rest = RestConnection(self.kv_servers[1]) rest.set_data_path(data_path=self.kv_servers[1].data_path,index_path=self.kv_servers[1].index_path,cbas_path=self.kv_servers[1].cbas_path) rebalance = self.cluster.async_rebalance(nodes_in_cluster, [self.kv_servers[1]], []) nodes_in_cluster += [self.kv_servers[1]] futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) load_thread.join() ######################################################################################################################## self.log.info("Step 45: Wait for rebalance to complete.") reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.sleep(20) updates_from = items_start_from deletes_from = items_start_from + total_num_items/10 items_start_from += total_num_items ######################################################################################################################## self.log.info("Step 46: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 47: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items",5000) executors=[] query_executors = 1 num_executors = query_executors upsert_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_delete", args=(self.master, GleambookUsers, num_items/10,10000,4, updates_from,1, 0)) delete_thread = Thread(target=self.delete_buckets_with_high_ops, name="high_ops_delete", args=(self.master, GleambookUsers, num_items/10, self.rate_limit, 10000, 2, deletes_from,1)) delete_thread.start() upsert_thread.start() for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) delete_thread.join() upsert_thread.join() ######################################################################################################################## self.log.info("Step 48: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 49: Create 10M docs.") pool = Executors.newFixedThreadPool(5) total_num_items = self.input.param("num_items",1000000) num_query = self.input.param("num_query",240) self.use_replica_to = False self.rate_limit = self.input.param('rate_limit', '100000') load_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_load", args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0)) self.log.info('starting the load thread...') load_thread.start() for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) ######################################################################################################################## self.log.info("Step 50: When 49 is in progress do a CBAS Rebalance OUT.") rest = RestConnection(self.kv_servers[2]) # rest.set_data_path(data_path=self.kv_servers[2].data_path,index_path=self.kv_servers[2].index_path,cbas_path=self.kv_servers[2].cbas_path) rebalance = self.cluster.async_rebalance(nodes_in_cluster, [], self.cbas_servers[-1:]) # nodes_in_cluster = [node for node in nodes_in_cluster if node not in self.cbas_servers[-1:]] # nodes_in_cluster.remove(self.kv_servers[1]) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) load_thread.join() ######################################################################################################################## self.log.info("Step 51: Wait for rebalance to complete.") reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.sleep(20) updates_from = items_start_from deletes_from = items_start_from + total_num_items/10 items_start_from += total_num_items ######################################################################################################################## self.log.info("Step 52: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 53: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items",5000) executors=[] query_executors = 1 num_executors = query_executors upsert_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_delete", args=(self.master, GleambookUsers, num_items/10,10000,4, updates_from,1, 0)) delete_thread = Thread(target=self.delete_buckets_with_high_ops, name="high_ops_delete", args=(self.master, GleambookUsers, num_items/10, self.rate_limit, 10000, 2, deletes_from,1)) delete_thread.start() upsert_thread.start() for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) delete_thread.join() upsert_thread.join() ######################################################################################################################## self.log.info("Step 54: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 55: Create 10M docs.") pool = Executors.newFixedThreadPool(5) total_num_items = self.input.param("num_items",1000000) num_query = self.input.param("num_query",240) self.use_replica_to = False self.rate_limit = self.input.param('rate_limit', '100000') load_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_load", args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0)) self.log.info('starting the load thread...') load_thread.start() for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) ######################################################################################################################## self.log.info("Step 56: When 55 is in progress do a CBAS Rebalance IN.") for node in self.cbas_servers[-1:]: rest = RestConnection(node) rest.set_data_path(data_path=node.data_path,index_path=node.index_path,cbas_path=node.cbas_path) rest = RestConnection(self.cbas_servers[-1]) rest.set_data_path(data_path=self.cbas_servers[-1].data_path,index_path=self.cbas_servers[-1].index_path,cbas_path=self.cbas_servers[-1].cbas_path) rebalance = self.cluster.async_rebalance(nodes_in_cluster, self.cbas_servers[-1:], [], services=["cbas"]) nodes_in_cluster += self.cbas_servers[-1:] futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) load_thread.join() ######################################################################################################################## self.log.info("Step 57: Wait for rebalance to complete.") reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.assertTrue(reached, "rebalance failed, stuck or did not complete") self.sleep(20) updates_from = items_start_from deletes_from = items_start_from + total_num_items/10 items_start_from += total_num_items ######################################################################################################################## self.log.info("Step 58: Verify the docs count.") self.validate_items_count() ######################################################################################################################## self.log.info("Step 59: Delete 1M docs. Update 1M docs.") pool = Executors.newFixedThreadPool(5) num_items = self.input.param("num_items",5000) executors=[] query_executors = 1 num_executors = query_executors upsert_thread = Thread(target=self.load_buckets_with_high_ops, name="high_ops_delete", args=(self.master, GleambookUsers, num_items/10,10000,4, updates_from,1, 0)) delete_thread = Thread(target=self.delete_buckets_with_high_ops, name="high_ops_delete", args=(self.master, GleambookUsers, num_items/10, self.rate_limit, 10000, 2, deletes_from,1)) delete_thread.start() upsert_thread.start() for i in xrange(query_executors): executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util)) futures = pool.invokeAll(executors) for future in futures: print future.get(num_executors, TimeUnit.SECONDS) print "Executors completed!!" shutdown_and_await_termination(pool, num_executors) delete_thread.join() upsert_thread.join() ######################################################################################################################## self.log.info("Step 60: Verify the docs count.") self.validate_items_count() print "End Time: %s"%str(time.strftime("%H:%M:%S", time.gmtime(time.time())))