def _common_test_body(self): master = self.servers[0] rest = RestConnection(master) bucket_data = RebalanceBaseTest.bucket_data_init(rest) cluster_size = self.input.param("cluster_size", len(self.servers)) howMany = self.input.param("howMany", cluster_size - 1) if howMany >= cluster_size: self.fail( "Input error! howMany {0} rebalance-outs should be lesser than cluster_size {1}".format(howMany, \ cluster_size)) # add all servers self.log.info("Rebalancing In with cluster size {0}".format(cluster_size)) RebalanceTaskHelper.add_rebalance_task(self.task_manager, [master], self.servers[1:cluster_size], []) self.log.info("Initial Load with key-count {0}".format(self.keys_count)) RebalanceBaseTest.load_all_buckets_task(rest, self.task_manager, bucket_data, ram_load_ratio=self.load_ratio, keys_count=self.keys_count) while howMany > 0: if len(rest.node_statuses()) < 2: break if self.checkResidentRatio: self.log.info("Getting the resident ratio stats before failover/rebalancing out the nodes") RebalanceBaseTest.check_resident_ratio(self, master) # Never pick master node - The modified function takes care of this one. rebalanceOutNode = RebalanceHelper.pick_node(master) self.log.info( "Incrementally rebalancing out node {0}:{1}".format(rebalanceOutNode.ip, rebalanceOutNode.port)) # rebalance out a server RebalanceTaskHelper.add_rebalance_task(self.task_manager, [master], [], [rebalanceOutNode], do_stop=self.do_stop) # wait for loading tasks to finish RebalanceBaseTest.finish_all_bucket_tasks(rest, bucket_data) self.log.info("Completed Loading and Rebalacing out") if self.checkResidentRatio: self.log.info("Getting the resident ratio stats after rebalancing out the nodes") RebalanceBaseTest.check_resident_ratio(self, master) # verification step if self.do_verify: self.log.info("Verifying with KV store") RebalanceBaseTest.do_kv_and_replica_verification(master, self.task_manager, bucket_data, self.replica, self) else: self.log.info("No Verification with KV store") howMany = howMany - 1
def _common_test_body(self): master = self.servers[0] rest = RestConnection(master) bucket_data = RebalanceBaseTest.bucket_data_init(rest) self.log.info("INTIAL LOAD") RebalanceBaseTest.load_all_buckets_task(rest, self.task_manager, bucket_data, self.load_ratio, keys_count=self.keys_count) rebalance_out = False for server in self.servers[1:]: if rebalance_out: # Pick a node to rebalance out, other than master ejectedNodes = [RebalanceHelper.pick_node(master)] else: ejectedNodes = [] current_nodes = RebalanceHelper.getOtpNodeIds(master) self.log.info("current nodes : {0}".format(current_nodes)) self.log.info("adding node {0}, removing node {1} and rebalance afterwards".format(server.ip, [node.ip for node in ejectedNodes])) self.log.info("START PARALLEL LOAD") RebalanceBaseTest.tasks_for_buckets(rest, self.task_manager, bucket_data, DELETE_RATIO=self.delete_ratio, ACCESS_RATIO=self.access_ratio, EXPIRY_RATIO=self.expiry_ratio) self.log.info("INCREMENTAL REBALANCE IN/OUT") # rebalance in/out a server RebalanceTaskHelper.add_rebalance_task(self.task_manager, [master], [server], ejectedNodes, do_stop=self.do_stop) # wait for loading tasks to finish RebalanceBaseTest.finish_all_bucket_tasks(rest, bucket_data) # Make sure we have at least 3 nodes, for replica=2 if len(current_nodes) > 2: rebalance_out = True if self.do_verify: self.log.info("VERIFICATION") RebalanceBaseTest.do_kv_and_replica_verification(master, self.task_manager, bucket_data, self.replica, self) else: self.log.info("NO VERIFICATION")
def _common_test_body(self): master = self.servers[0] rest = RestConnection(master) creds = self.input.membase_settings bucket_data = RebalanceBaseTest.bucket_data_init(rest) ClusterHelper.add_all_nodes_or_assert(master, self.servers, creds, self) rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[]) self.assertTrue(rest.monitorRebalance(), msg="rebalance operation failed after adding nodes") nodes = rest.node_statuses() #dont rebalance out the current node while len(nodes) > 1: #pick a node that is not the master node toBeEjectedNode = RebalanceHelper.pick_node(master) distribution = RebalanceBaseTest.get_distribution(self.load_ratio) RebalanceBaseTest.load_data_for_buckets(rest, self.load_ratio, distribution, [master], bucket_data, self) self.log.info("current nodes : {0}".format([node.id for node in rest.node_statuses()])) #let's start/step rebalance three times self.log.info("removing node {0} and rebalance afterwards".format(toBeEjectedNode.id)) rest.fail_over(toBeEjectedNode.id) self.log.info("failed over {0}".format(toBeEjectedNode.id)) time.sleep(10) rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[toBeEjectedNode.id]) expected_progress = 30 reached = RestHelper(rest).rebalance_reached(expected_progress) self.assertTrue(reached, "rebalance failed or did not reach {0}%".format(expected_progress)) stopped = rest.stop_rebalance() self.assertTrue(stopped, msg="unable to stop rebalance") time.sleep(20) RebalanceBaseTest.replication_verification(master, bucket_data, self.replica, self) rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[toBeEjectedNode.id]) self.assertTrue(rest.monitorRebalance(), msg="rebalance operation failed after adding node {0}".format(toBeEjectedNode.id)) time.sleep(20) RebalanceBaseTest.replication_verification(master, bucket_data, self.replica, self) nodes = rest.node_statuses()
def _common_test_body(self): master = self.servers[0] rest = RestConnection(master) bucket_data = RebalanceBaseTest.bucket_data_init(rest) # add all servers self.log.info("Initially rebalancing in the nodes") RebalanceTaskHelper.add_rebalance_task(self.task_manager, [master], self.servers[1:], [], monitor=True, do_stop=self.do_stop) self.log.info("Initial loading of data") RebalanceBaseTest.load_all_buckets_task(rest, self.task_manager, bucket_data, self.load_ratio, keys_count=self.keys_count) nodes = rest.node_statuses() for node in nodes[1:]: # Get the current cluster size, we will continnue fail-over till current_cluster_size= replica+1 current_cluster_len = len(rest.node_statuses()) if current_cluster_len < (self.replica + 1): self.log.info( "Replica count {0} is greater than the current cluster-size{1}, stopping failover test.".format( self.replica, current_cluster_len)) else: # Never pick master node if node.ip != master.ip: self.log.info("Starting Parallel Load ..") RebalanceBaseTest.tasks_for_buckets(rest, self.task_manager, bucket_data, DELETE_RATIO=self.delete_ratio, ACCESS_RATIO=self.access_ratio, EXPIRY_RATIO=self.expiry_ratio) # Pick a Node to failover toBeEjectedNode = RebalanceHelper.pick_node(master) self.log.info("Starting Failover and Rebalance Out for node {0}:{1}".format(toBeEjectedNode.ip, toBeEjectedNode.port)) # rebalance Out RebalanceTaskHelper.add_failover_task(self.task_manager, [master], [toBeEjectedNode], True) self.log.info( "Completed Failover for node {0}:{1}".format(toBeEjectedNode.ip, toBeEjectedNode.port)) # rebalance Out RebalanceTaskHelper.add_rebalance_task(self.task_manager, [master], [], [toBeEjectedNode], do_stop=self.do_stop, monitor=True) # wait for all tasks to finish RebalanceBaseTest.finish_all_bucket_tasks(rest, bucket_data) self.log.info("Completed Load, Failover and Rebalance Out. ") # verification step if self.do_verify: self.log.info("Verifying with KV-store") RebalanceBaseTest.do_kv_and_replica_verification(master, self.task_manager, bucket_data, self.replica, self, failed_over=True) else: self.log.info("No verification with KV-store specified") # at least 2 nodes required per loop to rebalance out and verify replication self.log.info("Completed Load and Rebalance-Out")
def test_rebalance_out(self): RebalanceBaseTest.common_setup(self._input, self, replica=1) log = logger.Logger().get_logger() master = self._servers[0] num_of_docs = TestInputSingleton.input.param("num_of_docs",100000) replica = TestInputSingleton.input.param("replica",100000) add_items_count = TestInputSingleton.input.param("num_of_creates",30000) size = TestInputSingleton.input.param("item_size",256) params = {"sizes": [size], "count": num_of_docs, "seed": str(uuid.uuid4())[:7]} rest = RestConnection(master) buckets = rest.get_buckets() bucket_data = {} generators = {} for bucket in buckets: bucket_data[bucket.name] = {"kv_store": ClientKeyValueStore()} rebalanced_in, which_servers = RebalanceBaseTest.rebalance_in(self._servers, len(self.servers) - 1) self.assertTrue(rebalanced_in, msg="unable to add and rebalance more nodes") rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[]) self.assertTrue(rest.monitorRebalance(), msg="rebalance operation failed after adding nodes {0}".format( [node.id for node in rest.node_statuses()])) while len(rest.node_statuses()) > 1: #pick a node that is not the master node toBeEjectedNode = RebalanceHelper.pick_node(master) rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[toBeEjectedNode.id]) self.assertTrue(rest.monitorRebalance(), msg="rebalance operation failed after adding node {0}".format(toBeEjectedNode.id)) for bucket in buckets: kv_store = bucket_data[bucket.name]["kv_store"] add_items_seed = str(uuid.uuid4())[:7] self._add_items(add_items_seed, bucket, add_items_count, kv_store) errors = RebalanceDataGenerator.do_verification(kv_store, rest, bucket.name) if errors: log.error("verification returned {0} errors".format(len(errors))) load_set_ops = {"ops": "set", "bucket": bucket.name} load_set_ops.update(params) load_delete_ops = {"ops": "delete", "bucket": bucket.name, "sizes": [size], "count": add_items_count / 5, "seed": add_items_seed} thread= RebalanceDataGenerator.start_load(rest, bucket.name, RebalanceDataGenerator.create_loading_tasks(load_set_ops), kv_store) generators["set"] = {"thread": thread} #restart three times generators["set"]["thread"].start() thread = RebalanceDataGenerator.start_load(rest, bucket.name, RebalanceDataGenerator.create_loading_tasks(load_delete_ops),kv_store) generators["delete"] = {"thread": thread} generators["delete"]["thread"].start() self.log.info("current nodes : {0}".format([node.id for node in rest.node_statuses()])) for bucket in buckets: kv_store = bucket_data[bucket.name]["kv_store"] errors = RebalanceDataGenerator.do_verification(kv_store, rest, bucket.name) if errors: log.error("verification returned {0} errors".format(len(errors))) generators["set"]["thread"].join() generators["delete"]["thread"].join() for bucket in buckets: kv_store = bucket_data[bucket.name]["kv_store"] bucket_data[bucket.name]["items_inserted_count"] = len(kv_store.valid_items()) RebalanceBaseTest.replication_verification(master, bucket_data, replica, self)
def test_rebalance_out(self): RebalanceBaseTest.common_setup(self._input, self, replica=1) log = logger.Logger().get_logger() master = self._servers[0] num_of_docs = TestInputSingleton.input.param("num_of_docs", 100000) replica = TestInputSingleton.input.param("replica", 100000) add_items_count = TestInputSingleton.input.param( "num_of_creates", 30000) size = TestInputSingleton.input.param("item_size", 256) params = { "sizes": [size], "count": num_of_docs, "seed": str(uuid.uuid4())[:7] } rest = RestConnection(master) buckets = rest.get_buckets() bucket_data = {} generators = {} for bucket in buckets: bucket_data[bucket.name] = {"kv_store": ClientKeyValueStore()} rebalanced_in, which_servers = RebalanceBaseTest.rebalance_in( self._servers, len(self.servers) - 1) self.assertTrue(rebalanced_in, msg="unable to add and rebalance more nodes") rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[]) self.assertTrue( rest.monitorRebalance(), msg="rebalance operation failed after adding nodes {0}".format( [node.id for node in rest.node_statuses()])) while len(rest.node_statuses()) > 1: #pick a node that is not the master node toBeEjectedNode = RebalanceHelper.pick_node(master) rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[toBeEjectedNode.id]) self.assertTrue( rest.monitorRebalance(), msg="rebalance operation failed after adding node {0}".format( toBeEjectedNode.id)) for bucket in buckets: kv_store = bucket_data[bucket.name]["kv_store"] add_items_seed = str(uuid.uuid4())[:7] self._add_items(add_items_seed, bucket, add_items_count, kv_store) errors = RebalanceDataGenerator.do_verification( kv_store, rest, bucket.name) if errors: log.error("verification returned {0} errors".format( len(errors))) load_set_ops = {"ops": "set", "bucket": bucket.name} load_set_ops.update(params) load_delete_ops = { "ops": "delete", "bucket": bucket.name, "sizes": [size], "count": add_items_count / 5, "seed": add_items_seed } thread = RebalanceDataGenerator.start_load( rest, bucket.name, RebalanceDataGenerator.create_loading_tasks(load_set_ops), kv_store) generators["set"] = {"thread": thread} #restart three times generators["set"]["thread"].start() thread = RebalanceDataGenerator.start_load( rest, bucket.name, RebalanceDataGenerator.create_loading_tasks( load_delete_ops), kv_store) generators["delete"] = {"thread": thread} generators["delete"]["thread"].start() self.log.info("current nodes : {0}".format( [node.id for node in rest.node_statuses()])) for bucket in buckets: kv_store = bucket_data[bucket.name]["kv_store"] errors = RebalanceDataGenerator.do_verification( kv_store, rest, bucket.name) if errors: log.error("verification returned {0} errors".format( len(errors))) generators["set"]["thread"].join() generators["delete"]["thread"].join() for bucket in buckets: kv_store = bucket_data[bucket.name]["kv_store"] bucket_data[bucket.name]["items_inserted_count"] = len( kv_store.valid_items()) RebalanceBaseTest.replication_verification( master, bucket_data, replica, self)
def test_incremental_rebalance_out_continuous_bidirectional_sets_deletes(self): cluster_ref_a = "cluster_ref_a" master_a = self._input.clusters.get(0)[0] rest_conn_a = RestConnection(master_a) cluster_ref_b = "cluster_ref_b" master_b = self._input.clusters.get(1)[0] rest_conn_b = RestConnection(master_b) # Setup bi-directional continuous replication replication_type = "continuous" rest_conn_a.add_remote_cluster(master_b.ip, master_b.port, master_b.rest_username, master_b.rest_password, cluster_ref_b) rest_conn_b.add_remote_cluster(master_a.ip, master_a.port, master_a.rest_username, master_a.rest_password, cluster_ref_a) (rep_database_a, rep_id_a) = rest_conn_a.start_replication( replication_type, self._buckets[0], cluster_ref_b) (rep_database_b, rep_id_b) = rest_conn_b.start_replication( replication_type, self._buckets[0], cluster_ref_a) self._state.append((rest_conn_a, cluster_ref_b, rep_database_a, rep_id_a)) self._state.append((rest_conn_b, cluster_ref_a, rep_database_b, rep_id_b)) load_thread_list = [] # Start load kvstore = ClientKeyValueStore() self._params["ops"] = "set" task_def = RebalanceDataGenerator.create_loading_tasks(self._params) load_thread = RebalanceDataGenerator.start_load(rest_conn_a, self._buckets[0], task_def, kvstore) load_thread.start() load_thread.join() # Do some deletes self._params["ops"] = "delete" self._params["count"] = self._num_items/5 task_def = RebalanceDataGenerator.create_loading_tasks(self._params) load_thread = RebalanceDataGenerator.start_load(rest_conn_a, self._buckets[0], task_def, kvstore) load_thread_list.append(load_thread) # Start all loads concurrently for lt in load_thread_list: lt.start() # Trigger rebalance on both source and destination clusters servers_a = self._input.clusters.get(0) servers_b = self._input.clusters.get(1) rebalanced_servers_a = [] rebalanced_servers_b = [] which_servers_a = [] which_servers_b = [] # Rebalance all the nodes together RebalanceHelper.rebalance_in(servers_a, len(servers_a)-1) RebalanceHelper.rebalance_in(servers_b, len(servers_b)-1) rebalanced_servers_a.extend(servers_a) rebalanced_servers_b.extend(servers_b) nodes_a = rest_conn_a.node_statuses() nodes_b = rest_conn_b.node_statuses() # Incremental rebalance out one node in cluster_a, then cluster_b while len(nodes_a) > 1: toBeEjectedNode = RebalanceHelper.pick_node(master_a) self.log.info("current nodes : {0}".format(RebalanceHelper.getOtpNodeIds(master_a))) self.log.info("removing node {0} and rebalance afterwards".format(toBeEjectedNode.id)) rest_conn_a.rebalance(otpNodes=[node.id for node in rest_conn_a.node_statuses()], \ ejectedNodes=[toBeEjectedNode.id]) self.assertTrue(rest_conn_a.monitorRebalance(), msg="rebalance operation failed after adding node {0}".format(toBeEjectedNode.id)) while len(nodes_b) > 1: toBeEjectedNode = RebalanceHelper.pick_node(master_b) self.log.info("current nodes : {0}".format(RebalanceHelper.getOtpNodeIds(master_b))) self.log.info("removing node {0} and rebalance afterwards".format(toBeEjectedNode.id)) rest_conn_b.rebalance(otpNodes=[node.id for node in rest_conn_b.node_statuses()],\ ejectedNodes=[toBeEjectedNode.id]) self.assertTrue(rest_conn_b.monitorRebalance(), msg="rebalance operation failed after adding node {0}".format(toBeEjectedNode.id)) break for node in nodes_b: for rebalanced_server in rebalanced_servers_b: if rebalanced_server.ip.find(node.ip) != -1: rebalanced_servers_b.remove(rebalanced_server) break nodes_b = rest_conn_a.node_statuses() for node in nodes_a: for rebalanced_server in rebalanced_servers_a: if rebalanced_server.ip.find(node.ip) != -1: rebalanced_servers_a.remove(rebalanced_server) break nodes_a = rest_conn_a.node_statuses() for node in nodes_a: for rebalanced_server in rebalanced_servers_a: if rebalanced_server.ip.find(node.ip) != -1: rebalanced_servers_a.remove(rebalanced_server) break nodes_a= rest_conn_a.node_statuses() # Wait for loading threads to finish for lt in load_thread_list: lt.join() self.log.info("All loading threads finished") # Verify replication self.assertTrue(XDCRBaseTest.verify_replicated_data(rest_conn_b, self._buckets[0], kvstore, self._poll_sleep, self._poll_timeout), "Verification of replicated data failed") self.assertTrue(XDCRBaseTest.verify_replicated_revs(rest_conn_a, rest_conn_b, self._buckets[0], self._poll_sleep, self._poll_timeout), "Verification of replicated revisions failed")
def test_failover_source_sets(self): replication_type = "continuous" self.log.info("Force initial rebalance.") # This test starts with a 2-2 unidirectional replication from cluster a # to cluster b; during the replication, we trigger failover of one node # on source cluster , resulting a 1-2 replication. # After all loading finish, verify data and rev on both clusters. replication_type = "continuous" self.log.info("Force initial rebalance.") cluster_ref_a = "cluster_ref_a" master_a = self._input.clusters.get(0)[0] rest_conn_a = RestConnection(master_a) cluster_ref_b = "cluster_ref_b" master_b = self._input.clusters.get(1)[0] rest_conn_b = RestConnection(master_b) self.log.info("START XDC replication...") # Start replication rest_conn_a.add_remote_cluster(master_b.ip, master_b.port, master_b.rest_username, master_b.rest_password, cluster_ref_b) (rep_database, rep_id) = rest_conn_a.start_replication(replication_type, self._buckets[0], cluster_ref_b) self._state.append((rest_conn_a, cluster_ref_b, rep_database, rep_id)) # Start load self.log.info("START loading data...") load_thread_list = [] kvstore = ClientKeyValueStore() self._params["ops"] = "set" task_def = RebalanceDataGenerator.create_loading_tasks(self._params) load_thread = RebalanceDataGenerator.start_load(rest_conn_a, self._buckets[0], task_def, kvstore) load_thread.start() # sleep a while to allow more data loaded time.sleep(5) self.log.info("current nodes on source cluster: {0}".format(RebalanceHelper.getOtpNodeIds(master_a))) # Trigger failover, we fail over one node each time until there is only one node remaining self.log.info("DURING replication, start failover...") self.log.info("FAILOVER nodes on Cluster A ...") nodes_a = rest_conn_a.node_statuses() while len(nodes_a) > 1: toBeFailedOverNode = RebalanceHelper.pick_node(master_a) self.log.info("failover node {0}".format(toBeFailedOverNode.id)) rest_conn_a.fail_over(toBeFailedOverNode) self.log.info("rebalance after failover") rest_conn_a.rebalance(otpNodes=[node.id for node in rest_conn_a.node_statuses()], \ ejectedNodes=[toBeFailedOverNode.id]) self.assertTrue(rest_conn_a.monitorRebalance(), msg="rebalance operation failed after removing node {0}".format(toBeFailedOverNode.id)) nodes_a = rest_conn_a.node_statuses() self.log.info("ALL failed over done...") # Wait for loading threads to finish for lt in load_thread_list: lt.join() self.log.info("All loading threads finished") # Verify replication self.log.info("START data verification at cluster A...") self.assertTrue(XDCRBaseTest.verify_replicated_data(rest_conn_a, self._buckets[0], kvstore, self._poll_sleep, self._poll_timeout), "Verification of replicated data failed") self.log.info("START data verification at cluster B...") self.assertTrue(XDCRBaseTest.verify_replicated_data(rest_conn_b, self._buckets[0], kvstore, self._poll_sleep, self._poll_timeout), "Verification of replicated data failed") self.log.info("START revision verification on both clusters...") self.assertTrue(XDCRBaseTest.verify_replicated_revs(rest_conn_a, rest_conn_b, self._buckets[0], self._poll_sleep, self._poll_timeout), "Verification of replicated revisions failed")