def test_all_cbas_node_running_queries(self): ''' Description: Test that all the cbas nodes are capable to serve queries. Steps: 1. Perform doc operation on the KV node. 2. Add 1 cbas node and setup cbas. 3. Add all other cbas nodes. 4. Verify all cbas nodes should be able to serve queries. Author: Ritesh Agarwal ''' set_up_cbas = False query = "select count(*) from {0};".format(self.cbas_dataset_name) self.bucket_util.create_default_bucket(self.cluster, storage=self.bucket_storage) self.perform_doc_ops_in_all_cb_buckets("create", 0, self.num_items) if self.cbas_node.ip == self.cluster.master.ip: set_up_cbas = self.setup_cbas_bucket_dataset_connect("default", self.num_items) temp_cbas_util = CbasUtil(self.cluster.master, self.cbas_node, self.task) temp_cbas_util.createConn("default") self.cbas_util._run_concurrent_queries(query,None,1000,self.cbas_util) temp_cbas_util.closeConn() for node in self.cluster.cbas_nodes: if node.ip != self.cluster.master.ip: self.cluster_util.add_node(node=node) if not set_up_cbas: set_up_cbas = self.setup_cbas_bucket_dataset_connect("default", self.num_items) temp_cbas_util = CbasUtil(self.cluster.master, self.cbas_node, self.task) temp_cbas_util.createConn("default") self.cbas_util._run_concurrent_queries(query,None,1000,self.cbas_util,batch_size=self.concurrent_batch_size) temp_cbas_util.closeConn()
def setUp(self): super(MultiNodeFailOver, self).setUp() self.log.info("Read the input params") self.nc_nc_fail_over = self.input.param("nc_nc_fail_over", True) self.create_secondary_indexes = self.input.param("create_secondary_indexes", False) # In this fail over we fail first 3 added cbas nodes[CC + first NC + Second NC] self.meta_data_node_failure = self.input.param("meta_data_node_failure", False) self.log.info("Add CBAS nodes to cluster") self.assertIsNotNone(self.cluster_util.add_node(self.cluster.cbas_nodes[0], services=["cbas"], rebalance=False), msg="Add node failed") self.assertIsNotNone(self.cluster_util.add_node(self.cluster.cbas_nodes[1], services=["cbas"], rebalance=True), msg="Add node failed") # This node won't be failed over if self.meta_data_node_failure: self.assertIsNotNone(self.cluster_util.add_node(self.cluster.cbas_nodes[2], services=["cbas"], rebalance=True), msg="Add node failed") self.log.info("Create connection") self.cbas_util.createConn(self.cb_bucket_name) self.log.info("Load documents in kv bucket") self.perform_doc_ops_in_all_cb_buckets("create", 0, self.num_items) self.log.info("Create dataset") self.cbas_util.create_dataset_on_bucket(self.cb_bucket_name, self.cbas_dataset_name) self.log.info("Create secondary index") if self.create_secondary_indexes: self.index_fields = "profession:string,number:bigint" create_idx_statement = "create index {0} on {1}({2});".format(self.index_name, self.cbas_dataset_name, self.index_fields) status, metrics, errors, results, _ = self.cbas_util.execute_statement_on_cbas_util(create_idx_statement) self.assertTrue(status == "success", "Create Index query failed") self.assertTrue(self.cbas_util.verify_index_created(self.index_name, self.index_fields.split(","), self.cbas_dataset_name)[0]) self.log.info("Connect Local link") self.cbas_util.connect_link() self.log.info("Validate dataset count") self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items) self.log.info("Pick nodes to fail over") self.fail_over_nodes = [] if self.nc_nc_fail_over: self.log.info("This is NC+NC fail over") self.fail_over_nodes.append(self.cluster.cbas_nodes[0]) self.fail_over_nodes.append(self.cluster.cbas_nodes[1]) self.neglect_failures = False else: self.log.info("This is NC+CC fail over") self.fail_over_nodes.append(self.cluster.cbas_nodes[0]) self.fail_over_nodes.append(self.cbas_node) self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[1], self.task) if self.meta_data_node_failure: self.fail_over_nodes.append(self.cluster.cbas_nodes[1]) self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[2], self.task) self.cbas_util.createConn(self.cb_bucket_name) self.neglect_failures = True
def test_chain_rebalance_out_cc(self): self.setup_for_test(skip_data_loading=True) self.ingestion_in_progress() total_cbas_nodes = len(self.otpNodes) while total_cbas_nodes > 1: cc_ip = self.cbas_util.retrieve_cc_ip(shell=self.shell) for otpnode in self.otpNodes: if otpnode.ip == cc_ip: self.cluster_util.remove_node(self.cluster, [otpnode], wait_for_rebalance=True) for server in self.cluster.cbas_nodes: if cc_ip != server.ip: self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, server) self.cbas_util.createConn("default") self.cbas_node = server break # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.log.info("Items before service restart: %s"%items_in_cbas_bucket) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info( "After rebalance operation docs in CBAS bucket : %s" % items_in_cbas_bucket) if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items: self.log.info( "Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did interrupted and restarting from 0." ) else: self.log.info( "Data Ingestion did not interrupted but complete before rebalance operation." ) query = "select count(*) from {0};".format( self.cbas_dataset_name) self.cbas_util._run_concurrent_queries( query, "immediate", 10) break total_cbas_nodes -= 1 if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 2): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) self.ingest_more_data()
def test_rebalance_swap_multiple_cbas_on_a_busy_system(self): ''' 1. We have 4 node cluster with 1 KV and 3 CBAS. Assume the IPS end with 101(KV), 102(CBAS), 103(CBAS), 104(CBAS) 2, Post initial setup - 101 running KV and 102 running CBAS as CC node 3. As part of test test add an extra NC node that we will swap rebalance later - Adding 103 and rebalance 4. If swap rebalance NC - then select the node added in #3 for remove and 104 to add during swap 5. If swap rebalance CC - then select the CC node added for remove and 104 to add during swap ''' self.log.info('Read service input param') node_services = [] node_services.append(self.input.param('service', "cbas")) self.log.info("Rebalance in CBAS nodes, this node will be removed during swap") self.cluster_util.add_node(node=self.rebalanceServers[1], services=node_services) self.log.info("Setup CBAS") self.setup_for_test(skip_data_loading=True) self.log.info("Run KV ops in async while rebalance is in progress") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items, start=0) tasks = self.bucket_util._async_load_all_buckets(self.cluster, generators, "create", 0) self.log.info("Run concurrent queries to simulate busy system") statement = "select sleep(count(*),50000) from {0} where mutated=0;".format(self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(statement, self.mode, self.num_concurrent_queries) self.log.info("Fetch node to remove during rebalance") self.rebalance_cc = self.input.param("rebalance_cc", False) out_nodes = [] nodes = self.rest.node_statuses() reinitialize_cbas_util = False for node in nodes: if self.rebalance_cc and (node.ip == self.cbas_node.ip): out_nodes.append(node) reinitialize_cbas_util = True elif not self.rebalance_cc and node.ip == self.rebalanceServers[1].ip: out_nodes.append(node) self.log.info("Swap rebalance CBAS nodes") self.cluster_util.add_node(node=self.rebalanceServers[3], services=node_services, rebalance=False) self.remove_node([out_nodes[0]], wait_for_rebalance=True) self.log.info("Get KV ops result") for task in tasks: self.task_manager.get_task_result(task) if reinitialize_cbas_util is True: self.cbas_util = CbasUtil(self.cluster.master, self.rebalanceServers[3], self.task) self.cbas_util.createConn("default") self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) count_n1ql = self.rest.query_tool('select count(*) from %s' % (self.cb_bucket_name))['results'][0]['$1'] if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, count_n1ql, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket")
def setUp(self): super(MemCompressionUpgradeTests, self).setUp() self.cbas_util = CbasUtil(self.task) self.cbas_spec_name = self.input.param("cbas_spec", "local_datasets") self.rebalance_util = CBASRebalanceUtil( self.cluster_util, self.bucket_util, self.task, vbucket_check=True, cbas_util=self.cbas_util) cbas_cc_node_ip = None retry = 0 self.cluster.cbas_nodes = \ self.cluster_util.get_nodes_from_services_map( self.cluster, service_type="cbas", get_all_nodes=True, servers=self.cluster.nodes_in_cluster)
def test_rebalance_out_multiple_cbas_on_a_busy_system(self): node_services = [] node_services.append(self.input.param('service',"cbas")) self.log.info("Rebalance in CBAS nodes") self.cluster_util.add_node(node=self.rebalanceServers[1], services=node_services) self.cluster_util.add_node(node=self.rebalanceServers[3], services=node_services) self.log.info("Setup CBAS") self.setup_for_test(skip_data_loading=True) self.log.info("Run KV ops in async while rebalance is in progress") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items, start=0) tasks = self.bucket_util._async_load_all_buckets(self.cluster, generators, "create", 0) self.log.info("Run concurrent queries to simulate busy system") statement = "select sleep(count(*),50000) from {0} where mutated=0;".format(self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(statement, self.mode, self.num_concurrent_queries) self.log.info("Fetch and remove nodes to rebalance out") self.rebalance_cc = self.input.param("rebalance_cc", False) out_nodes = [] nodes = self.rest.node_statuses() if self.rebalance_cc: for node in nodes: if node.ip == self.cbas_node.ip or node.ip == self.servers[1].ip: out_nodes.append(node) self.cbas_util.closeConn() self.log.info("Reinitialize CBAS utils with ip %s, since CC node is rebalanced out" %self.servers[3].ip) self.cbas_util = CbasUtil(self.cluster.master, self.servers[3], self.task) self.cbas_util.createConn("default") else: for node in nodes: if node.ip == self.servers[3].ip or node.ip == self.servers[1].ip: out_nodes.append(node) self.log.info("Rebalance out CBAS nodes %s %s" % (out_nodes[0].ip, out_nodes[1].ip)) self.remove_all_nodes_then_rebalance([out_nodes[0],out_nodes[1]]) self.log.info("Get KV ops result") for task in tasks: self.task_manager.get_task_result(task) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket")
def test_swap_rebalance_cb_cbas_together(self): self.log.info("Creates cbas buckets and dataset") wait_for_rebalance = self.input.param("wait_for_rebalance", True) dataset_count_query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() self.log.info("Add KV node and don't rebalance") self.cluster_util.add_node(node=self.rebalanceServers[1], rebalance=False) self.log.info("Add cbas node and don't rebalance") self.cluster_util.add_node(node=self.rebalanceServers[3], rebalance=False) otpnodes = [] nodes = self.rest.node_statuses() for node in nodes: if node.ip == self.rebalanceServers[0].ip or node.ip == self.rebalanceServers[2].ip: otpnodes.append(node) self.log.info("Remove master node") self.remove_node(otpnode=otpnodes, wait_for_rebalance=wait_for_rebalance) self.cluster.master = self.rebalanceServers[1] self.log.info("Create instances pointing to new master nodes") c_utils = CbasUtil(self.rebalanceServers[1], self.rebalanceServers[3], self.task) c_utils.createConn(self.cb_bucket_name) self.log.info("Create reference to SDK client") client = SDKClient(scheme="couchbase", hosts=[self.rebalanceServers[1].ip], bucket=self.cb_bucket_name, password=self.rebalanceServers[1].rest_password) self.log.info("Add more document to default bucket") documents = ['{"name":"value"}'] * (self.num_items//10) document_id_prefix = "custom-id-" client.insert_custom_json_documents(document_id_prefix, documents) self.log.info( "Run queries as rebalance is in progress : Rebalance state:%s" % self.rest._rebalance_progress_status()) handles = c_utils._run_concurrent_queries(dataset_count_query, "immediate", 2000, batch_size=self.concurrent_batch_size) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) if not c_utils.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items + (self.num_items//10) , 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket")
def setUp(self, add_default_cbas_node=True): super(CBASBaseTest, self).setUp() if self._testMethodDoc: self.log.info("Starting Test: %s - %s" % (self._testMethodName, self._testMethodDoc)) else: self.log.info("Starting Test: %s" % self._testMethodName) invalid_ip = '10.111.151.109' self.cb_bucket_name = self.input.param('cb_bucket_name', 'travel-sample') self.cbas_bucket_name = self.input.param('cbas_bucket_name', 'travel') self.cb_bucket_password = self.input.param('cb_bucket_password', None) self.cb_server_ip = self.input.param("cb_server_ip", None) self.cb_server_ip = \ self.cb_server_ip.replace('INVALID_IP', invalid_ip) \ if self.cb_server_ip is not None else None self.cbas_dataset_name = self.input.param("cbas_dataset_name", 'travel_ds') self.cbas_bucket_name_invalid = \ self.input.param('cbas_bucket_name_invalid', self.cbas_bucket_name) self.cbas_dataset2_name = self.input.param('cbas_dataset2_name', None) self.skip_create_dataset = self.input.param('skip_create_dataset', False) self.disconnect_if_connected = \ self.input.param('disconnect_if_connected', False) self.cbas_dataset_name_invalid = \ self.input.param('cbas_dataset_name_invalid', self.cbas_dataset_name) self.skip_drop_connection = self.input.param('skip_drop_connection', False) self.skip_drop_dataset = self.input.param('skip_drop_dataset', False) self.query_id = self.input.param('query_id', None) self.mode = self.input.param('mode', None) self.num_concurrent_queries = self.input.param('num_queries', 5000) self.concurrent_batch_size = self.input.param('concurrent_batch_size', 100) self.compiler_param = self.input.param('compiler_param', None) self.compiler_param_val = self.input.param('compiler_param_val', None) self.expect_reject = self.input.param('expect_reject', False) self.expect_failure = self.input.param('expect_failure', False) self.compress_dataset = self.input.param('compress_dataset', False) self.index_name = self.input.param('index_name', "NoName") self.index_fields = self.input.param('index_fields', None) if self.index_fields: self.index_fields = self.index_fields.split("-") self.retry_time = self.input.param("retry_time", 300) self.num_retries = self.input.param("num_retries", 1) self.sample_bucket_dict = { TravelSample().name: TravelSample(), BeerSample().name: BeerSample() } self.sample_bucket = None self.flush_enabled = Bucket.FlushBucket.ENABLED self.test_abort_snapshot = self.input.param("test_abort_snapshot", False) self.cbas_spec_name = self.input.param("cbas_spec", None) self._cb_cluster = self.get_clusters() self.expected_error = self.input.param("error", None) self.bucket_spec = self.input.param("bucket_spec", None) self.doc_spec_name = self.input.param("doc_spec_name", "initial_load") self.set_cbas_memory_from_available_free_memory = self.input.param( 'set_cbas_memory_from_available_free_memory', False) self.parallel_load_percent = int( self.input.param("parallel_load_percent", 0)) self.cbas_kill_count = self.input.param("cbas_kill_count", 0) self.memcached_kill_count = self.input.param("memcached_kill_count", 0) self.tamper_links_count = self.input.param("tamper_links_count", 0) self.cbas_node = None self.cbas_memory_quota_percent = int( self.input.param("cbas_memory_quota_percent", 100)) self.bucket_size = self.input.param("bucket_size", 100) services = None nodes_init = None # Single cluster support if len(self._cb_cluster) == 1: self._cb_cluster = self._cb_cluster[0] self.cluster.nodes_in_cluster.extend([self.cluster.master]) if self.services_init and self.nodes_init >= 3: if len(self.cluster.servers) < self.nodes_init or \ len(self.services_init.split("-")) != self.nodes_init: self.fail("Configuration error. Re-check nodes_init, " "services_init in .conf file and servers " "available in .ini " "file") services = list() for service in self.services_init.split( "-")[1:self.nodes_init]: services.append(service.replace(":", ",")) # Initialize cluster using given nodes nodes_init = list( filter(lambda node: node.ip != self.cluster.master.ip, self.cluster.servers[1:self.nodes_init])) for node, services_init in map(None, nodes_init, services): if services_init is None: services.append("kv") if not self.cbas_node and "cbas" in services_init: self.cbas_node = node self.cbas_node.services = services_init idx = self.cluster.servers.index(node) self.cluster.servers[idx].services = services_init for server in self.cluster.servers: if "cbas" in server.services: self.cluster.cbas_nodes.append(server) if "kv" in server.services: self.cluster.kv_nodes.append(server) rest = RestConnection(server) rest.set_data_path(data_path=server.data_path, index_path=server.index_path, cbas_path=server.cbas_path) if self.expected_error: self.expected_error = \ self.expected_error.replace("INVALID_IP", invalid_ip) self.expected_error = \ self.expected_error.replace("PORT", self.cluster.master.port) self.otpNodes = [] self.cbas_path = server.cbas_path self.rest = RestConnection(self.cluster.master) if not self.set_cbas_memory_from_available_free_memory: self.log.info( "Setting the min possible memory quota so that adding " "more nodes to the cluster wouldn't be a problem.") self.rest.set_service_mem_quota({ CbServer.Settings.KV_MEM_QUOTA: MIN_KV_QUOTA, CbServer.Settings.FTS_MEM_QUOTA: FTS_QUOTA, CbServer.Settings.INDEX_MEM_QUOTA: INDEX_QUOTA }) self.set_cbas_memory_from_available_free_memory = \ self.input.param( 'set_cbas_memory_from_available_free_memory', False) self.log.info("Setting %d memory quota for CBAS" % CBAS_QUOTA) self.cbas_memory_quota = CBAS_QUOTA self.rest.set_service_mem_quota( {CbServer.Settings.CBAS_MEM_QUOTA: CBAS_QUOTA}) if self.expected_error: self.expected_error = \ self.expected_error.replace("INVALID_IP", invalid_ip) self.expected_error = \ self.expected_error.replace("PORT", self.cluster.master.port) self.cbas_util = None if self.cluster.cbas_nodes: if not self.cbas_node: available_cbas_nodes = list( filter(lambda node: node.ip != self.cluster.master.ip, self.cluster.cbas_nodes)) self.cbas_node = available_cbas_nodes[0] if self.set_cbas_memory_from_available_free_memory: self.set_memory_for_services(self.rest, self.cluster_util, self.cbas_node, self.cbas_node.services) self.cbas_util = CbasUtil(self.cluster.master, self.cbas_node) self.cbas_util_v2 = CbasUtilV2(self.cluster.master, self.cbas_node, self.task) if "cbas" in self.cluster.master.services: self.cleanup_cbas() if add_default_cbas_node: if self.cluster.master.ip != self.cbas_node.ip: self.otpNodes.append( self.cluster_util.add_node(self.cbas_node)) self.cluster.nodes_in_cluster.append(self.cbas_node) if nodes_init: idx = nodes_init.index(self.cbas_node) services.pop(idx) nodes_init.remove(self.cbas_node) else: self.otpNodes = self.rest.node_statuses() ''' This cbas cleanup is actually not needed. When a node is added to the cluster, it is automatically cleaned-up.''' self.cleanup_cbas() self.cluster.cbas_nodes.remove(self.cbas_node) if nodes_init: self.task.rebalance([self.cluster.master], nodes_init, [], services=services) self.cluster.nodes_in_cluster.extend(nodes_init) if self.bucket_spec is not None: try: self.collectionSetUp(self.cluster, self.bucket_util, self.cluster_util) except Java_base_exception as exception: self.handle_collection_setup_exception(exception) except Exception as exception: self.handle_collection_setup_exception(exception) else: if self.default_bucket: self.bucket_util.create_default_bucket( self.cluster, bucket_type=self.bucket_type, ram_quota=self.bucket_size, replica=self.num_replicas, conflict_resolution=self. bucket_conflict_resolution_type, replica_index=self.bucket_replica_index, storage=self.bucket_storage, eviction_policy=self.bucket_eviction_policy, flush_enabled=self.flush_enabled) elif self.cb_bucket_name in self.sample_bucket_dict.keys(): self.sample_bucket = \ self.sample_bucket_dict[self.cb_bucket_name] elif len(self._cb_cluster) > 1: # Multi Cluster Support for cluster in self._cb_cluster: for server in cluster.servers: if CbServer.Services.CBAS in server.services: cluster.cbas_nodes.append(server) if CbServer.Services.KV in server.services: cluster.kv_nodes.append(server) rest = RestConnection(server) rest.set_data_path(data_path=server.data_path, index_path=server.index_path, cbas_path=server.cbas_path) if self.expected_error: cluster.expected_error = \ self.expected_error.replace("INVALID_IP", invalid_ip) cluster.expected_error = \ self.expected_error.replace("PORT", cluster.master.port) cluster.otpNodes = list() cluster.cbas_path = server.cbas_path cluster.rest = RestConnection(cluster.master) if not self.set_cbas_memory_from_available_free_memory: self.log.info( "Setting the min possible memory quota so that adding " "more nodes to the cluster wouldn't be a problem.") cluster.rest.set_service_mem_quota({ CbServer.Settings.KV_MEM_QUOTA: MIN_KV_QUOTA, CbServer.Settings.FTS_MEM_QUOTA: FTS_QUOTA, CbServer.Settings.INDEX_MEM_QUOTA: INDEX_QUOTA }) cluster.set_cbas_memory_from_available_free_memory = \ self.input.param( 'set_cbas_memory_from_available_free_memory', False) self.log.info("Setting %d memory quota for CBAS" % CBAS_QUOTA) cluster.cbas_memory_quota = CBAS_QUOTA cluster.rest.set_service_mem_quota( {CbServer.Settings.CBAS_MEM_QUOTA: CBAS_QUOTA}) cluster.cbas_util = None # Drop any existing buckets and datasets if cluster.cbas_nodes: cluster.cbas_node = cluster.cbas_nodes[0] if self.set_cbas_memory_from_available_free_memory: self.set_memory_for_services( cluster.rest, cluster.cluster_util, cluster.cbas_node, cluster.cbas_node.services) cluster.cbas_util = CbasUtil(cluster.master, cluster.cbas_node, self.task) cluster.cbas_util_v2 = CbasUtilV2(cluster.master, cluster.cbas_node) if "cbas" in cluster.master.services: self.cleanup_cbas(cluster.cbas_util) if add_default_cbas_node: if cluster.master.ip != cluster.cbas_node.ip: cluster.otpNodes.append( cluster.cluster_util.add_node( cluster, cluster.cbas_node)) else: cluster.otpNodes = cluster.rest.node_statuses() """ This cbas cleanup is actually not needed. When a node is added to the cluster, it is automatically cleaned-up. """ self.cleanup_cbas(cluster.cbas_util) cluster.cbas_nodes.remove(cluster.cbas_node) if self.bucket_spec is not None: try: self.collectionSetUp(cluster, cluster.bucket_util, cluster.cluster_util) except Java_base_exception as exception: self.handle_collection_setup_exception(exception) except Exception as exception: self.handle_collection_setup_exception(exception) else: if self.default_bucket: cluster.bucket_util.create_default_bucket( self.cluster, bucket_type=self.bucket_type, ram_quota=self.bucket_size, replica=self.num_replicas, conflict_resolution=self. bucket_conflict_resolution_type, replica_index=self.bucket_replica_index, storage=self.bucket_storage, eviction_policy=self.bucket_eviction_policy, flush_enabled=self.flush_enabled) elif self.cb_bucket_name in self.sample_bucket_dict.keys(): self.sample_bucket = self.sample_bucket_dict[ self.cb_bucket_name] cluster.bucket_util.add_rbac_user(self.cluster.master) else: self.fail("No cluster is available") self.log.info( "=== CBAS_BASE setup was finished for test #{0} {1} ===".format( self.case_number, self._testMethodName))
def setUp(self): super(AppBase, self).setUp() self.log_setup_status("AppBase", "started") self.step_num = 1 self.initial_load = self.input.param("initial_load", False) self.cluster_conf = self.input.param("cluster_conf", None) self.bucket_conf = self.input.param("bucket_conf", None) self.service_conf = self.input.param("service_conf", None) self.rbac_conf = self.input.param("rbac_conf", None) self.rbac_util = RbacUtil() self.sdk_clients = global_vars.sdk_clients self.app_path = "pytests/bucket_collections/app/" self.config_path = self.app_path + "config/" if self.cluster_conf is not None: with open(self.config_path + self.cluster_conf + ".yaml", "r") as fp: self.cluster_conf = YAML().load(fp.read()) self.__init_rebalance_with_rbac_setup() # Update cluster node-service map and create cbas_util self.cluster_util.update_cluster_nodes_service_list(self.cluster) self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) # Load bucket conf if self.bucket_conf is not None: with open(self.config_path + self.bucket_conf + ".yaml", "r") as fp: self.bucket_conf = YAML().load(fp.read()) # Load RBAC conf if self.rbac_conf is not None: with open(self.config_path + self.rbac_conf + ".yaml", "r") as fp: self.rbac_conf = YAML().load(fp.read()) if self.bucket_conf is not None: self.__setup_buckets() self.bucket = self.cluster.buckets[0] if self.rbac_conf is not None: for rbac_roles in self.rbac_conf["rbac_roles"]: self.create_sdk_clients(rbac_roles["roles"]) if self.service_conf is not None: with open(self.config_path + self.service_conf + ".yaml", "r") as fp: self.service_conf = YAML().load(fp.read())["services"] # Configure backup settings self.configure_bucket_backups() # Create required GSIs self.create_indexes() # Create required CBAS data-sets self.create_cbas_indexes() self.log_setup_status("AppBase", "complete")
def test_stop_network_ingest_data(self): self.setup_for_test() self.cbas_node_type = self.input.param('cbas_node_type', None) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() # Add the code for stop network here: if self.cbas_node_type: if self.cbas_node_type == "CC": node_in_test = self.cbas_node self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") else: node_in_test = self.cluster.cbas_nodes[0] # Stop network on KV node to mimic n/w partition on KV else: node_in_test = self.cluster.master items_in_cbas_bucket_before, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Intems before network down: %s" % items_in_cbas_bucket_before) RemoteMachineShellConnection(node_in_test).stop_network("30") # self.sleep(40, "Wait for network to come up.") items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass # items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) self.log.info("Items after network is up: %s" % items_in_cbas_bucket) # start_time = time.time() # while items_in_cbas_bucket_after <=0 and time.time()<start_time+60: # items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.sleep(1) # items_in_cbas_bucket = items_in_cbas_bucket_after if items_in_cbas_bucket < self.num_items * 3 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did not interrupted but restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before service restart." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.cbas_node_type == "NC": self.assertTrue(fail_count + aborted_count == 0, "Some queries failed/aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 3): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" )
def test_disk_full_ingest_data(self): self.cbas_node_type = self.input.param('cbas_node_type', None) if self.cbas_node_type == "CC": node_in_test = self.cbas_node self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) else: node_in_test = self.cluster.cbas_nodes[0] remote_client = RemoteMachineShellConnection(node_in_test) output, error = remote_client.execute_command("rm -rf full_disk*", use_channel=True) remote_client.log_command_output(output, error) self.setup_for_test() query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) def _get_disk_usage_in_MB(remote_client): disk_info = remote_client.get_disk_info(in_MB=True) disk_space = disk_info[1].split()[-3][:-1] return disk_space du = int(_get_disk_usage_in_MB(remote_client)) - 50 chunk_size = 1024 while int(du) > 0: output, error = remote_client.execute_command( "dd if=/dev/zero of=full_disk{0} bs={1}M count=1".format( str(du) + "_MB" + str(time.time()), chunk_size), use_channel=True) remote_client.log_command_output(output, error) du -= 1024 if du < 1024: chunk_size = du self.ingestion_in_progress() items_in_cbas_bucket_before, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) try: while items_in_cbas_bucket_before != items_in_cbas_bucket_after: items_in_cbas_bucket_before, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.sleep(2) items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: self.log.info("Ingestion interrupted and server seems to be down") if items_in_cbas_bucket_before == self.num_items * 3: self.log.info("Data Ingestion did not interrupted but completed.") elif items_in_cbas_bucket_before < self.num_items * 3: self.log.info("Data Ingestion Interrupted successfully") output, error = remote_client.execute_command("rm -rf full_disk*", use_channel=True) remote_client.log_command_output(output, error) remote_client.disconnect() self.sleep( 10, "wait for service to come up after disk space is made available.") run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.cbas_node_type == "NC": self.assertTrue(fail_count + aborted_count == 0, "Some queries failed/aborted") self.sleep(60) query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 3): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" )
def test_stop_start_service_ingest_data(self): self.setup_for_test() self.cbas_node_type = self.input.param('cbas_node_type', None) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() if self.cbas_node_type == "CC": node_in_test = self.cbas_node self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") else: node_in_test = self.cluster.cbas_nodes[0] items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Items before service restart: %s" % items_in_cbas_bucket) self.log.info("Gracefully stopping service on node %s" % node_in_test) NodeHelper.stop_couchbase(node_in_test) NodeHelper.start_couchbase(node_in_test) NodeHelper.wait_service_started(node_in_test) # self.sleep(10, "wait for service to come up.") # # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.log.info("After graceful STOPPING/STARTING service docs in CBAS bucket : %s"%items_in_cbas_bucket) # # start_time = time.time() # while items_in_cbas_bucket <=0 and time.time()<start_time+60: # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.sleep(1) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass if items_in_cbas_bucket < self.num_items * 3 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did not interrupted but restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before service restart." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.cbas_node_type == "NC": self.assertTrue(fail_count + aborted_count == 0, "Some queries failed/aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 3): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" )
def setUp(self): super(UpgradeTests, self).setUp() self.cbas_util = CbasUtil(self.task) self.cbas_spec_name = self.input.param("cbas_spec", "local_datasets") self.rebalance_util = CBASRebalanceUtil(self.cluster_util, self.bucket_util, self.task, vbucket_check=True, cbas_util=self.cbas_util) if self.input.param("n2n_encryption", False): CbServer.use_https = True trust_all_certs() self.security_util = SecurityUtils(self.log) rest = RestConnection(self.cluster.master) self.log.info("Disabling Auto-Failover") if not rest.update_autofailover_settings(False, 120): self.fail("Disabling Auto-Failover failed") self.log.info("Setting node to node encryption level to all") self.security_util.set_n2n_encryption_level_on_nodes( self.cluster.nodes_in_cluster, level="all") CbServer.use_https = True self.log.info("Enabling Auto-Failover") if not rest.update_autofailover_settings(True, 300): self.fail("Enabling Auto-Failover failed") cbas_cc_node_ip = None retry = 0 self.cluster.cbas_nodes = \ self.cluster_util.get_nodes_from_services_map( self.cluster, service_type="cbas", get_all_nodes=True, servers=self.cluster.nodes_in_cluster) while True and retry < 60: cbas_cc_node_ip = self.cbas_util.retrieve_cc_ip_from_master( self.cluster) if cbas_cc_node_ip: break else: self.sleep(10, "Waiting for CBAS service to come up") retry += 1 if not cbas_cc_node_ip: self.fail("CBAS service did not come up even after 10 " "mins.") for server in self.cluster.cbas_nodes: if server.ip == cbas_cc_node_ip: self.cluster.cbas_cc_node = server break if not self.cbas_util.wait_for_cbas_to_recover(self.cluster, timeout=300): self.fail("Analytics service failed to start post adding cbas " "nodes to cluster") self.pre_upgrade_setup() self.log_setup_status(self.__class__.__name__, "Finished", stage=self.setUp.__name__)
def test_cc_swap_rebalance(self): self.restart_rebalance = self.input.param('restart_rebalance', False) self.setup_for_test(skip_data_loading=True) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() replicas_before_rebalance = len( self.cbas_util.get_replicas_info(self.shell)) self.cluster_util.add_node(node=self.cluster.cbas_nodes[-1], rebalance=False) swap_nc = self.input.param('swap_nc', False) if not swap_nc: out_nodes = [self.otpNodes[0]] self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") self.cbas_node = self.cluster.cbas_nodes[0] else: out_nodes = [self.otpNodes[1]] self.cluster_util.remove_node(self.cluster, out_nodes, wait_for_rebalance=False) self.sleep(5, "Wait for sometime after rebalance started.") if self.restart_rebalance: if self.rest._rebalance_progress_status() == "running": self.assertTrue(self.rest.stop_rebalance(wait_timeout=120), "Failed while stopping rebalance.") self.sleep(10) else: self.fail( "Rebalance completed before the test could have stopped rebalance." ) self.rebalance(ejected_nodes=[node.id for node in out_nodes], wait_for_completion=False) self.sleep(5) str_time = time.time() while self.rest._rebalance_progress_status( ) == "running" and time.time() < str_time + 300: replicas = self.cbas_util.get_replicas_info(self.shell) if replicas: for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.sleep(30) replicas = self.cbas_util.get_replicas_info(self.shell) replicas_after_rebalance = len(replicas) self.assertEqual( replicas_after_rebalance, replicas_before_rebalance, "%s,%s" % (replicas_after_rebalance, replicas_before_rebalance)) for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.assertEqual( replica['status'], "IN_SYNC", "Replica state is incorrect: %s" % replica['status']) # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.log.info("Items before service restart: %s"%items_in_cbas_bucket) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info("After rebalance operation docs in CBAS bucket : %s" % items_in_cbas_bucket) if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did interrupted and restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before rebalance operation." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(self.cluster.master) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( self.cluster.master, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 2): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) self.ingest_more_data()
def test_auto_retry_failed_rebalance(self): # Auto-retry rebalance settings body = {"enabled": "true", "afterTimePeriod": self.retry_time, "maxAttempts": self.num_retries} rest = RestConnection(self.cluster.master) rest.set_retry_rebalance_settings(body) result = rest.get_retry_rebalance_settings() self.log.info("Pick the incoming and outgoing nodes during rebalance") self.rebalance_type = self.input.param("rebalance_type", "in") nodes_to_add = [self.rebalanceServers[1]] nodes_to_remove = [] reinitialize_cbas_util = False if self.rebalance_type == 'out': nodes_to_remove.append(self.rebalanceServers[1]) self.cluster_util.add_node(self.rebalanceServers[1]) nodes_to_add = [] elif self.rebalance_type == 'swap': self.cluster_util.add_node(nodes_to_add[0], rebalance=False) nodes_to_remove.append(self.cbas_node) reinitialize_cbas_util = True self.log.info("Incoming nodes - %s, outgoing nodes - %s. For rebalance type %s " % ( nodes_to_add, nodes_to_remove, self.rebalance_type)) self.log.info("Creates cbas buckets and dataset") dataset_count_query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() self.log.info("Perform async doc operations on KV") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items * 3 / 2, start=self.num_items) kv_task = self.bucket_util._async_load_all_buckets(self.cluster, generators, "create", 0, batch_size=5000) self.log.info("Run concurrent queries on CBAS") handles = self.cbas_util._run_concurrent_queries(dataset_count_query, "async", self.num_concurrent_queries) self.log.info("Fetch the server to restart couchbase on") restart_couchbase_on_incoming_or_outgoing_node = self.input.param( "restart_couchbase_on_incoming_or_outgoing_node", True) if not restart_couchbase_on_incoming_or_outgoing_node: node = self.cbas_node else: node = self.rebalanceServers[1] shell = RemoteMachineShellConnection(node) try: self.log.info("Rebalance nodes") self.task.async_rebalance(self.servers, nodes_to_add, nodes_to_remove) self.sleep(10, message="Restarting couchbase after 10s on node %s" % node.ip) shell.restart_couchbase() self.sleep(30, message="Waiting for service to be back again...") self.sleep(self.retry_time, "Wait for retry time to complete and then check the rebalance results") reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.log.info("Rebalance status : {0}".format(reached)) self.sleep(20) self._check_retry_rebalance_succeeded() if reinitialize_cbas_util is True: self.cbas_util = CbasUtil(self.cluster.master, self.rebalanceServers[1], self.task) self.cbas_util.createConn("default") self.cbas_util.wait_for_cbas_to_recover() self.log.info("Get KV ops result") for task in kv_task: self.task_manager.get_task_result(task) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) self.log.info("Validate dataset count on CBAS") if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items * 3 / 2, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket") except Exception as e: self.fail("Some exception occurred : {0}".format(e.message)) finally: body = {"enabled": "false"} rest.set_retry_rebalance_settings(body)
def test_to_fail_initial_rebalance_and_verify_subsequent_rebalance_succeeds(self): self.log.info("Pick the incoming and outgoing nodes during rebalance") self.rebalance_type = self.input.param("rebalance_type", "in") nodes_to_add = [self.rebalanceServers[1]] nodes_to_remove = [] reinitialize_cbas_util = False if self.rebalance_type == 'out': nodes_to_remove.append(self.rebalanceServers[1]) self.cluster_util.add_node(self.rebalanceServers[1]) nodes_to_add = [] elif self.rebalance_type == 'swap': self.cluster_util.add_node(nodes_to_add[0], rebalance=False) nodes_to_remove.append(self.cbas_node) reinitialize_cbas_util = True self.log.info("Incoming nodes - %s, outgoing nodes - %s. For rebalance type %s " %(nodes_to_add, nodes_to_remove, self.rebalance_type)) self.log.info("Creates cbas buckets and dataset") dataset_count_query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() self.log.info("Perform async doc operations on KV") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items * 3 / 2, start=self.num_items) kv_task = self.bucket_util._async_load_all_buckets(self.cluster, generators, "create", 0, batch_size=5000) self.log.info("Run concurrent queries on CBAS") handles = self.cbas_util._run_concurrent_queries(dataset_count_query, "async", self.num_concurrent_queries) self.log.info("Fetch the server to restart couchbase on") restart_couchbase_on_incoming_or_outgoing_node = self.input.param("restart_couchbase_on_incoming_or_outgoing_node", True) if not restart_couchbase_on_incoming_or_outgoing_node: node = self.cbas_node else: node = self.rebalanceServers[1] shell = RemoteMachineShellConnection(node) self.log.info("Rebalance nodes") self.task.async_rebalance(self.servers, nodes_to_add, nodes_to_remove) self.log.info("Restart Couchbase on node %s" % node.ip) shell.restart_couchbase() self.sleep(30, message="Waiting for service to be back again...") self.log.info("Verify subsequent rebalance is successful") nodes_to_add = [] # Node is already added to cluster in previous rebalance, adding it again will throw exception self.assertTrue(self.task.rebalance(self.servers, nodes_to_add, nodes_to_remove)) if reinitialize_cbas_util is True: self.cbas_util = CbasUtil(self.cluster.master, self.rebalanceServers[1], self.task) self.cbas_util.createConn("default") self.cbas_util.wait_for_cbas_to_recover() self.log.info("Get KV ops result") for task in kv_task: self.task_manager.get_task_result(task) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) self.log.info("Validate dataset count on CBAS") if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items * 3 / 2, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket")
def test_failover(self): self.setup_for_test(skip_data_loading=True) self.rebalance_node = self.input.param('rebalance_node', 'CC') self.how_many = self.input.param('how_many', 1) self.restart_rebalance = self.input.param('restart_rebalance', False) self.replica_change = self.input.param('replica_change', 0) self.add_back = self.input.param('add_back', False) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() if self.rebalance_node == "CC": node_in_test = [self.cbas_node] otpNodes = [self.otpNodes[0]] self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") self.cbas_node = self.cluster.cbas_nodes[0] elif self.rebalance_node == "NC": node_in_test = self.cluster.cbas_nodes[:self.how_many] otpNodes = self.nc_otpNodes[:self.how_many] else: node_in_test = [self.cbas_node ] + self.cluster.cbas_nodes[:self.how_many] otpNodes = self.otpNodes[:self.how_many + 1] self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[self.how_many]) self.cbas_util.createConn("default") replicas_before_rebalance = len( self.cbas_util.get_replicas_info(self.shell)) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info("Items before failover node: %s" % items_in_cbas_bucket) if self.restart_rebalance: graceful_failover = self.input.param("graceful_failover", False) failover_task = self._cb_cluster.async_failover( self.input.servers, node_in_test, graceful_failover) self.task_manager.get_task_result(failover_task) if self.add_back: for otpnode in otpNodes: self.rest.set_recovery_type('ns_1@' + otpnode.ip, "full") self.rest.add_back_node('ns_1@' + otpnode.ip) self.rebalance(wait_for_completion=False) else: self.rebalance(ejected_nodes=[node.id for node in otpNodes], wait_for_completion=False) self.sleep(2) if self.rest._rebalance_progress_status() == "running": self.assertTrue(self.rest.stop_rebalance(wait_timeout=120), "Failed while stopping rebalance.") if self.add_back: self.rebalance(wait_for_completion=False) else: self.rebalance( ejected_nodes=[node.id for node in otpNodes], wait_for_completion=False) else: self.fail( "Rebalance completed before the test could have stopped rebalance." ) else: graceful_failover = self.input.param("graceful_failover", False) failover_task = self._cb_cluster.async_failover( self.input.servers, node_in_test, graceful_failover) self.task_manager.get_task_result(failover_task) if self.add_back: for otpnode in otpNodes: self.rest.set_recovery_type('ns_1@' + otpnode.ip, "full") self.rest.add_back_node('ns_1@' + otpnode.ip) self.rebalance(wait_for_completion=False) replicas_before_rebalance -= self.replica_change self.sleep(5) str_time = time.time() while self.rest._rebalance_progress_status( ) == "running" and time.time() < str_time + 300: replicas = self.cbas_util.get_replicas_info(self.shell) if replicas: for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.sleep(15) replicas = self.cbas_util.get_replicas_info(self.shell) replicas_after_rebalance = len(replicas) self.assertEqual( replicas_after_rebalance, replicas_before_rebalance, "%s,%s" % (replicas_after_rebalance, replicas_before_rebalance)) for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.assertEqual( replica['status'], "IN_SYNC", "Replica state is incorrect: %s" % replica['status']) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info("After rebalance operation docs in CBAS bucket : %s" % items_in_cbas_bucket) if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did interrupted and restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before rebalance operation." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test[0]) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.rebalance_node == "NC": self.assertTrue(aborted_count == 0, "Some queries aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 2): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) self.ingest_more_data()
def setUp(self): """ Since BaseTestCase will initialize at least one cluster, we pass service for the master node of that cluster """ if not hasattr(self, "input"): self.input = TestInputSingleton.input """ In case of multi cluster setup, if cluster address family needs to be set, then this parameter is required """ if self.input.param("cluster_ip_family", ""): cluster_ip_family = self.input.param("cluster_ip_family", "").split("|") if cluster_ip_family[0] == "ipv4_only": self.input.test_params.update({ "ipv4_only": True, "ipv6_only": False }) elif cluster_ip_family[0] == "ipv6_only": self.input.test_params.update({ "ipv4_only": False, "ipv6_only": True }) elif cluster_ip_family[0] == "ipv4_ipv6": self.input.test_params.update({ "ipv4_only": True, "ipv6_only": True }) else: self.input.test_params.update({ "ipv4_only": False, "ipv6_only": False }) super(CBASBaseTest, self).setUp() """ Cluster node services. Parameter value format serv1:serv2-serv1:ser2|serv1:serv2-ser1:serv2 | -> separates services per cluster. - -> separates services on each node of the cluster. : -> separates services on a node. """ self.services_init = [ x.split("-") for x in self.input.param("services_init", "kv:n1ql:index").split("|") ] """ Number of nodes per cluster. Parameter value format num_nodes_cluster1|num_nodes_cluster2|.... | -> separates number of nodes per cluster. """ if not isinstance(self.input.param("nodes_init", 1), int): self.nodes_init = [ int(x) for x in self.input.param("nodes_init", 1).split("|") ] else: self.nodes_init = [self.input.param("nodes_init", 1)] if self._testMethodDoc: self.log.info("Starting Test: %s - %s" % (self._testMethodName, self._testMethodDoc)) else: self.log.info("Starting Test: %s" % self._testMethodName) """ Parameterized Support for multiple cluster instead of creating multiple clusters from ini file. """ self.num_of_clusters = self.input.param('num_of_clusters', 1) """ Since BaseTestCase will initialize at least one cluster, we need to modify the initialized cluster server property to correctly reflect the servers in that cluster. """ start = 0 end = self.nodes_init[0] cluster = self.cb_clusters[self.cb_clusters.keys()[0]] cluster.servers = self.servers[start:end] if "cbas" in cluster.master.services: cluster.cbas_nodes.append(cluster.master) """ Since BaseTestCase will initialize at least one cluster, we need to initialize only total clusters required - 1. """ cluster_name_format = "C%s" for i in range(1, self.num_of_clusters): start = end end += self.nodes_init[i] cluster_name = cluster_name_format % str(i + 1) cluster = CBCluster(name=cluster_name, servers=self.servers[start:end]) self.cb_clusters[cluster_name] = cluster cluster.nodes_in_cluster.append(cluster.master) cluster.kv_nodes.append(cluster.master) self.initialize_cluster(cluster_name, cluster, services=self.services_init[i][0]) cluster.master.services = self.services_init[i][0].replace( ":", ",") if "cbas" in cluster.master.services: cluster.cbas_nodes.append(cluster.master) if self.input.param("cluster_ip_family", ""): # Enforce IPv4 or IPv6 or both if cluster_ip_family[i] == "ipv4_only": status, msg = self.cluster_util.enable_disable_ip_address_family_type( cluster, True, True, False) if cluster_ip_family[i] == "ipv6_only": status, msg = self.cluster_util.enable_disable_ip_address_family_type( cluster, True, False, True) if cluster_ip_family[i] == "ipv4_ipv6": status, msg = self.cluster_util.enable_disable_ip_address_family_type( cluster, True, True, True) if not status: self.fail(msg) self.modify_cluster_settings(cluster) self.available_servers = self.servers[end:] """ KV infra to be created per cluster. Accepted values are - bkt_spec : will create KV infra based on bucket spec. bucket_spec param needs to be passed. default : will create a bucket named default on the cluster. None : no buckets will be created on cluster | -> separates number of nodes per cluster. """ if self.input.param("cluster_kv_infra", None): self.cluster_kv_infra = self.input.param("cluster_kv_infra", None).split("|") if len(self.cluster_kv_infra) < self.num_of_clusters: self.cluster_kv_infra.extend( [None] * (self.num_of_clusters - len(self.cluster_kv_infra))) else: self.cluster_kv_infra = [None] * self.num_of_clusters # Common properties self.num_concurrent_queries = self.input.param('num_queries', 5000) self.concurrent_batch_size = self.input.param('concurrent_batch_size', 100) self.index_fields = self.input.param('index_fields', None) if self.index_fields: self.index_fields = self.index_fields.split("-") self.retry_time = self.input.param("retry_time", 300) self.num_retries = self.input.param("num_retries", 1) self.cbas_spec_name = self.input.param("cbas_spec", None) self.expected_error = self.input.param("error", None) self.bucket_spec = self.input.param("bucket_spec", "analytics.default") self.doc_spec_name = self.input.param("doc_spec_name", "initial_load") self.set_default_cbas_memory = self.input.param( 'set_default_cbas_memory', False) self.cbas_memory_quota_percent = int( self.input.param("cbas_memory_quota_percent", 100)) self.bucket_size = self.input.param("bucket_size", 250) self.cbas_util = CbasUtil(self.task) self.service_mem_dict = { "kv": [ CbServer.Settings.KV_MEM_QUOTA, CbServer.Settings.MinRAMQuota.KV, 0 ], "fts": [ CbServer.Settings.FTS_MEM_QUOTA, CbServer.Settings.MinRAMQuota.FTS, 0 ], "index": [ CbServer.Settings.INDEX_MEM_QUOTA, CbServer.Settings.MinRAMQuota.INDEX, 0 ], "cbas": [ CbServer.Settings.CBAS_MEM_QUOTA, CbServer.Settings.MinRAMQuota.CBAS, 0 ], } # Add nodes to the cluster as per node_init param. for i, (cluster_name, cluster) in enumerate(self.cb_clusters.items()): cluster.rest = RestConnection(cluster.master) cluster_services = self.cluster_util.get_services_map(cluster) cluster_info = cluster.rest.get_nodes_self() for service in cluster_services: if service != "n1ql": property_name = self.service_mem_dict[service][0] service_mem_in_cluster = cluster_info.__getattribute__( property_name) self.service_mem_dict[service][2] = service_mem_in_cluster j = 1 for server in cluster.servers: if server.ip != cluster.master.ip: server.services = self.services_init[i][j].replace( ":", ",") j += 1 if "cbas" in server.services: cluster.cbas_nodes.append(server) if "kv" in server.services: cluster.kv_nodes.append(server) rest = RestConnection(server) rest.set_data_path(data_path=server.data_path, index_path=server.index_path, cbas_path=server.cbas_path) if self.set_default_cbas_memory: self.log.info( "Setting the min possible memory quota so that adding " "more nodes to the cluster wouldn't be a problem.") cluster.rest.set_service_mem_quota({ CbServer.Settings.KV_MEM_QUOTA: CbServer.Settings.MinRAMQuota.KV, CbServer.Settings.FTS_MEM_QUOTA: CbServer.Settings.MinRAMQuota.FTS, CbServer.Settings.INDEX_MEM_QUOTA: CbServer.Settings.MinRAMQuota.INDEX }) self.log.info("Setting %d memory quota for CBAS" % CbServer.Settings.MinRAMQuota.CBAS) cluster.cbas_memory_quota = CbServer.Settings.MinRAMQuota.CBAS cluster.rest.set_service_mem_quota({ CbServer.Settings.CBAS_MEM_QUOTA: CbServer.Settings.MinRAMQuota.CBAS }) else: self.set_memory_for_services(cluster, server, server.services) if cluster.servers[1:]: self.task.rebalance(cluster, cluster.servers[1:], [], services=[ server.services for server in cluster.servers[1:] ]) if cluster.cbas_nodes: cbas_cc_node_ip = None retry = 0 while True and retry < 60: cbas_cc_node_ip = self.cbas_util.retrieve_cc_ip_from_master( cluster) if cbas_cc_node_ip: break else: self.sleep(10, "Waiting for CBAS service to come up") retry += 1 if not cbas_cc_node_ip: self.fail("CBAS service did not come up even after 10 " "mins.") for server in cluster.cbas_nodes: if server.ip == cbas_cc_node_ip: cluster.cbas_cc_node = server break if "cbas" in cluster.master.services: self.cbas_util.cleanup_cbas(cluster) cluster.otpNodes = cluster.rest.node_statuses() # Wait for analytics service to be up. if hasattr(cluster, "cbas_cc_node"): if not self.cbas_util.is_analytics_running(cluster): self.fail("Analytics service did not come up even after 10\ mins of wait after initialisation") if self.input.param("n2n_encryption", False): self.security_util = SecurityUtils(self.log) rest = RestConnection(cluster.master) self.log.info("Disabling Auto-Failover") if not rest.update_autofailover_settings(False, 120): self.fail("Disabling Auto-Failover failed") self.log.info("Setting node to node encryption level to all") self.security_util.set_n2n_encryption_level_on_nodes( cluster.nodes_in_cluster, level=self.input.param("n2n_encryption_level", "control")) CbServer.use_https = True self.log.info("Enabling Auto-Failover") if not rest.update_autofailover_settings(True, 300): self.fail("Enabling Auto-Failover failed") if self.input.param("analytics_loggers", None): """ This flag is used for setting analytics internal log levels. These logs are helpful while dubugging issues as they provide a deeper insight into working on CBAS service. This flag can be used to set one or more logger for analytics. logger_name_1:level-logger_name_2:level-...... """ cbas_loggers = self.input.param("analytics_loggers", None).split("-") log_level_dict = dict() for logger in cbas_loggers: tmp = logger.split(":") log_level_dict[tmp[0]] = tmp[1] self.log.info("Setting following log levels for analytics - " "{0}".format(log_level_dict)) status, content, response = self.cbas_util.set_log_level_on_cbas( self.cluster, log_level_dict, timeout=120) if not status: self.fail("Error while setting log level for CBAS - " "{0}".format(content)) self.log.info("Verifying whether log levels set successfully") status, content, response = self.cbas_util.get_log_level_on_cbas( self.cluster) match_counter = 0 if status: actual_log_levels = content["loggers"] for logger in actual_log_levels: if (logger["name"] in log_level_dict) and \ logger["level"] == log_level_dict[logger["name"]]: match_counter += 1 if match_counter == len(log_level_dict): self.log.info("All log levels were set successfully") else: self.fail("Some log levels were not set") else: self.fail("Error while fetching log levels") self.disk_optimized_thread_settings = self.input.param( "disk_optimized_thread_settings", False) if self.disk_optimized_thread_settings: self.set_num_writer_and_reader_threads( cluster, num_writer_threads="disk_io_optimized", num_reader_threads="disk_io_optimized") if self.cluster_kv_infra[i] == "bkt_spec": if self.bucket_spec is not None: try: self.collectionSetUp(cluster) except Java_base_exception as exception: self.handle_setup_exception(exception) except Exception as exception: self.handle_setup_exception(exception) else: self.fail("Error : bucket_spec param needed") elif self.cluster_kv_infra[i] == "default": self.bucket_util.create_default_bucket( cluster, bucket_type=self.bucket_type, ram_quota=self.bucket_size, replica=self.num_replicas, conflict_resolution=self.bucket_conflict_resolution_type, replica_index=self.bucket_replica_index, storage=self.bucket_storage, eviction_policy=self.bucket_eviction_policy, flush_enabled=self.flush_enabled) self.bucket_util.add_rbac_user(cluster.master) self.log.info( "=== CBAS_BASE setup was finished for test #{0} {1} ===".format( self.case_number, self._testMethodName))
def test_rebalance_kv_rollback_create_ops(self): self.setup_for_test() items_before_persistence_stop = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name)[0] self.log.info("Items in CBAS before persistence stop: %s" % items_before_persistence_stop) # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA") mem_client = MemcachedClientHelper.direct_client( self.cluster.master, self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 3 / 2) kv_nodes = self.get_kv_nodes(self.servers, self.cluster.master) items_in_cb_bucket = 0 if self.where_field and self.where_value: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) # Validate no. of items in CBAS dataset self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, items_in_cb_bucket, 0), "No. of items in CBAS dataset do not match that in the CB bucket") # Count no. of items in CB & CBAS Buckets items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) if self.CC: self.cluster_util.remove_node([self.otpNodes[0]], wait_for_rebalance=False) self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") else: self.cluster_util.remove_node([self.otpNodes[1]], wait_for_rebalance=False) # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.cluster.master) shell.kill_memcached() self.sleep(2, "Wait for 2 secs for DCP rollback sent to CBAS.") curr = time.time() while items_in_cbas_bucket == -1 or ( items_in_cbas_bucket != 0 and items_in_cbas_bucket > items_before_persistence_stop): try: if curr + 120 < time.time(): break items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Items in CBAS: %s" % items_in_cbas_bucket) except: self.log.info( "Probably rebalance is in progress and the reason for queries being failing." ) pass self.assertTrue(items_in_cbas_bucket <= items_before_persistence_stop, "Roll-back did not happen.") self.log.info("#######BINGO########\nROLLBACK HAPPENED") items_in_cb_bucket = 0 curr = time.time() while items_in_cb_bucket != items_in_cbas_bucket or items_in_cb_bucket == 0: items_in_cb_bucket = 0 items_in_cbas_bucket = 0 if self.where_field and self.where_value: try: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] except: self.log.info( "Indexer in rollback state. Query failed. Pass and move ahead." ) pass else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) self.log.info("Items in CB bucket after rollback: %s" % items_in_cb_bucket) try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass if curr + 120 < time.time(): break str_time = time.time() while self.rest._rebalance_progress_status( ) == "running" and time.time() < str_time + 300: self.sleep(1) self.log.info("Waiting for rebalance to complete") self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "After Rollback : # Items in CBAS bucket does not match that in the CB bucket" )
def setUp(self, add_default_cbas_node=True): super(CBASBaseTest, self).setUp() if self._testMethodDoc: self.log.info("Starting Test: %s - %s" % (self._testMethodName, self._testMethodDoc)) else: self.log.info("Starting Test: %s" % self._testMethodName) for server in self.cluster.servers: if "cbas" in server.services: self.cluster.cbas_nodes.append(server) if "kv" in server.services: self.cluster.kv_nodes.append(server) rest = RestConnection(server) rest.set_data_path(data_path=server.data_path, index_path=server.index_path, cbas_path=server.cbas_path) invalid_ip = '10.111.151.109' self._cb_cluster = self.task self.cb_bucket_name = self.input.param('cb_bucket_name', 'travel-sample') self.sample_bucket_dict = {TravelSample().name: TravelSample(), BeerSample().name: BeerSample()} self.sample_bucket = None self.cbas_bucket_name = self.input.param('cbas_bucket_name', 'travel') self.cb_bucket_password = self.input.param('cb_bucket_password', None) self.expected_error = self.input.param("error", None) if self.expected_error: self.expected_error = self.expected_error.replace("INVALID_IP", invalid_ip) self.expected_error = \ self.expected_error.replace("PORT", self.cluster.master.port) self.cb_server_ip = self.input.param("cb_server_ip", None) self.cb_server_ip = \ self.cb_server_ip.replace('INVALID_IP', invalid_ip) \ if self.cb_server_ip is not None else None self.cbas_dataset_name = self.input.param("cbas_dataset_name", 'travel_ds') self.cbas_bucket_name_invalid = \ self.input.param('cbas_bucket_name_invalid', self.cbas_bucket_name) self.cbas_dataset2_name = self.input.param('cbas_dataset2_name', None) self.skip_create_dataset = self.input.param('skip_create_dataset', False) self.disconnect_if_connected = \ self.input.param('disconnect_if_connected', False) self.cbas_dataset_name_invalid = \ self.input.param('cbas_dataset_name_invalid', self.cbas_dataset_name) self.skip_drop_connection = self.input.param('skip_drop_connection', False) self.skip_drop_dataset = self.input.param('skip_drop_dataset', False) self.query_id = self.input.param('query_id', None) self.mode = self.input.param('mode', None) self.num_concurrent_queries = self.input.param('num_queries', 5000) self.concurrent_batch_size = self.input.param('concurrent_batch_size', 100) self.compiler_param = self.input.param('compiler_param', None) self.compiler_param_val = self.input.param('compiler_param_val', None) self.expect_reject = self.input.param('expect_reject', False) self.expect_failure = self.input.param('expect_failure', False) self.compress_dataset = self.input.param('compress_dataset', False) self.index_name = self.input.param('index_name', "NoName") self.index_fields = self.input.param('index_fields', None) self.retry_time = self.input.param("retry_time", 300) self.num_retries = self.input.param("num_retries", 1) self.flush_enabled = Bucket.FlushBucket.ENABLED self.test_abort_snapshot = self.input.param("test_abort_snapshot", False) if self.index_fields: self.index_fields = self.index_fields.split("-") self.otpNodes = list() self.cbas_path = server.cbas_path self.rest = RestConnection(self.cluster.master) self.log.info("Setting the min possible memory quota so that adding " "more nodes to the cluster wouldn't be a problem.") self.rest.set_service_memoryQuota(service='memoryQuota', memoryQuota=MIN_KV_QUOTA) self.rest.set_service_memoryQuota(service='ftsMemoryQuota', memoryQuota=FTS_QUOTA) self.rest.set_service_memoryQuota(service='indexMemoryQuota', memoryQuota=INDEX_QUOTA) self.set_cbas_memory_from_available_free_memory = \ self.input.param('set_cbas_memory_from_available_free_memory', False) if self.set_cbas_memory_from_available_free_memory: info = self.rest.get_nodes_self() self.cbas_memory_quota = int((info.memoryFree // 1024 ** 2) * 0.9) self.log.info("Setting %d memory quota for CBAS" % self.cbas_memory_quota) self.rest.set_service_memoryQuota( service='cbasMemoryQuota', memoryQuota=self.cbas_memory_quota) else: self.log.info("Setting %d memory quota for CBAS" % CBAS_QUOTA) self.cbas_memory_quota = CBAS_QUOTA self.rest.set_service_memoryQuota(service='cbasMemoryQuota', memoryQuota=CBAS_QUOTA) self.cbas_util = None # Drop any existing buckets and datasets if self.cluster.cbas_nodes: self.cbas_node = self.cluster.cbas_nodes[0] self.cbas_util = CbasUtil(self.cluster.master, self.cbas_node, self.task) if "cbas" in self.cluster.master.services: self.cleanup_cbas() if add_default_cbas_node: if self.cluster.master.ip != self.cbas_node.ip: self.otpNodes.append( self.cluster_util.add_node(self.cbas_node)) else: self.otpNodes = self.rest.node_statuses() """ This cbas cleanup is actually not needed. When a node is added to the cluster, it is automatically cleaned-up. """ self.cleanup_cbas() self.cluster.cbas_nodes.remove(self.cbas_node) if self.default_bucket: self.bucket_util.create_default_bucket( bucket_type=self.bucket_type, ram_quota=self.bucket_size, replica=self.num_replicas, conflict_resolution=self.bucket_conflict_resolution_type, replica_index=self.bucket_replica_index, storage=self.bucket_storage, eviction_policy=self.bucket_eviction_policy, flush_enabled=self.flush_enabled) elif self.cb_bucket_name in self.sample_bucket_dict.keys(): self.sample_bucket = self.sample_bucket_dict[self.cb_bucket_name] self.bucket_util.add_rbac_user() self.log.info("=== CBAS_BASE setup was finished for test #{0} {1} ===" .format(self.case_number, self._testMethodName))
def test_logging_configurations_are_shared_across_cbas_node(self): self.log.info("Add a cbas node") result = self.cluster_util.add_node(self.cluster.cbas_nodes[0], services=["cbas"], rebalance=True) self.assertTrue(result, msg="Failed to add CBAS node") self.log.info("Delete all loggers") self.cbas_util.delete_all_loggers_on_cbas() self.log.info( "Set the logging level using json object from default logger config dictionary on master cbas node" ) status, content, response = self.cbas_util.set_log_level_on_cbas( CbasLogging.DEFAULT_LOGGER_CONFIG_DICT) self.assertTrue(status, msg="Response status incorrect for SET request") self.log.info("Verify logging configuration that we set on cbas Node") for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = self.cbas_util.get_specific_cbas_log_level( name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name) self.sleep( timeout=10, message= "Waiting for logger configuration to be copied across cbas nodes") self.log.info("Verify logging configuration on other cbas node") for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = CbasUtil( self.cluster.master, self.cluster.cbas_nodes[0]).get_specific_cbas_log_level(name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name) self.log.info("Update logging configuration on other cbas node") logger_level = self.input.param("logger_level", "FATAL") logger_name = self.input.param("logger_name", "org.apache.asterix") status, content, response = CbasUtil( self.cluster.master, self.cluster.cbas_nodes[0]).set_specific_log_level_on_cbas( logger_name, logger_level) self.assertTrue(status, msg="Status mismatch for SET") self.sleep( timeout=10, message= "Waiting for logger configuration to be copied across cbas nodes") self.log.info("Assert log level on master cbas node") status, content, response = self.cbas_util.get_specific_cbas_log_level( logger_name) self.assertTrue(status, msg="Status mismatch for GET") self.assertEquals(content, logger_level, msg="Logger configuration mismatch for " + logger_name)
def test_logging_configurations_are_restored_post_service_restarts(self): self.log.info("Add a cbas node") result = self.cluster_util.add_node(self.cluster.cbas_nodes[0], services=["cbas"], rebalance=True) self.assertTrue(result, msg="Failed to add CBAS node") self.log.info("Delete all loggers") self.cbas_util.delete_all_loggers_on_cbas() self.log.info("Set the logging level using the json object") status, content, response = self.cbas_util.set_log_level_on_cbas( CbasLogging.DEFAULT_LOGGER_CONFIG_DICT) self.assertTrue(status, msg="Response status incorrect for SET request") self.log.info("Delete specific logger") logger_name = self.input.param("logger_name_to_delete", "com.couchbase.client.core.node") status, content, response = self.cbas_util.delete_specific_cbas_log_level( logger_name) self.assertTrue(status, msg="Status mismatch for DELETE") del CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[logger_name] self.log.info("Update specific logger") logger_name = self.input.param("logger_name_to_update", "org.apache.hyracks") logger_level_to_update = self.input.param("logger_level_to_update", "FATAL") status, response, content = self.cbas_util.set_specific_log_level_on_cbas( logger_name, logger_level_to_update) self.assertTrue(status, msg="Status mismatch for SET") CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[ logger_name] = logger_level_to_update self.log.info("Add a new logger") logger_name = self.input.param("logger_name_to_add", "org.apache.hyracks123") logger_level_to_add = self.input.param("logger_level_to_add", "ALL") status, response, content = self.cbas_util.set_specific_log_level_on_cbas( logger_name, logger_level_to_add) self.assertTrue(status, msg="Status mismatch for SET") CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[ logger_name] = logger_level_to_add self.log.info("Verify logging configuration that we set on cbas Node") for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = self.cbas_util.get_specific_cbas_log_level( name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name) self.sleep( timeout=10, message= "Waiting for logger configuration to be copied across cbas nodes") self.log.info("Verify logging configuration on other cbas node") for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = CbasUtil( self.cluster.master, self.cluster.cbas_nodes[0]).get_specific_cbas_log_level(name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name) self.log.info("Read input params") process_name = self.input.param('process_name', None) service_name = self.input.param('service_name', None) restart_couchbase = self.input.param('restart_couchbase', False) reboot = self.input.param('reboot', False) kill_services = self.input.param('kill_services', False) self.log.info("Establish a remote connection") shell_cc = RemoteMachineShellConnection(self.cbas_node) shell_nc = RemoteMachineShellConnection(self.cluster.cbas_nodes[0]) if kill_services: self.log.info("Kill the %s service on CC cbas node" % service_name) shell_cc.kill_process(process_name, service_name) self.log.info("Kill the %s service on other cbas node" % service_name) shell_nc.kill_process(process_name, service_name) if restart_couchbase: self.log.info("Restart couchbase service") status, _, _ = self.cbas_util.restart_analytics_cluster_uri() self.assertTrue(status, msg="Failed to restart cbas") if reboot: self.log.info("Reboot couchbase CC node") shell = RemoteMachineShellConnection(self.cbas_node) shell.reboot_server_and_wait_for_cb_run(self.cluster_util, self.cbas_node) shell.disconnect() self.log.info("Reboot couchbase NC node") shell = RemoteMachineShellConnection(self.cluster.cbas_nodes[0]) shell.reboot_server_and_wait_for_cb_run(self.cluster_util, self.cluster.cbas_nodes[0]) shell.disconnect() self.log.info( "Wait for request to complete and cluster to be active: Using private ping() function" ) cluster_recover_start_time = time.time() while time.time() < cluster_recover_start_time + 180: try: status, metrics, _, cbas_result, _ = self.cbas_util.execute_statement_on_cbas_util( "set `import-private-functions` `true`;ping()") if status == "success": break except: self.sleep(3, message="Wait for service to up") self.log.info("Verify logging configuration post service kill") for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = self.cbas_util.get_specific_cbas_log_level( name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name) self.sleep( timeout=10, message= "Waiting for logger configuration to be copied across cbas nodes") self.log.info( "Verify logging configuration on other cbas node post service kill" ) for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = CbasUtil( self.cluster.master, self.cluster.cbas_nodes[0]).get_specific_cbas_log_level(name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name)