Exemplo n.º 1
0
    def remove_node(self, otpnode=None, wait_for_rebalance=True):
        nodes = self.rest.node_statuses()
        '''This is the case when master node is running cbas service as well'''
        if len(nodes) <= len(otpnode):
            return

        helper = RestHelper(self.rest)
        try:
            removed = helper.remove_nodes(
                knownNodes=[node.id for node in nodes],
                ejectedNodes=[node.id for node in otpnode],
                wait_for_rebalance=wait_for_rebalance)
        except Exception as e:
            self.sleep(
                5,
                "First time rebalance failed on Removal. Wait and try again. THIS IS A BUG."
            )
            removed = helper.remove_nodes(
                knownNodes=[node.id for node in nodes],
                ejectedNodes=[node.id for node in otpnode],
                wait_for_rebalance=wait_for_rebalance)
        if wait_for_rebalance:
            self.assertTrue(
                removed,
                "Rebalance operation failed while removing %s," % otpnode)
 def test_eventing_rebalance_in_when_existing_eventing_node_is_processing_mutations(self):
     self.create_save_handlers()
     self.deploy_all_handlers()
     # load data
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default",wait_for_loading=False)
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1",wait_for_loading=False)
     # rebalance in a eventing node when eventing is processing mutations
     services_in = ["eventing"]
     rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], [self.servers[self.nodes_init]], [],
                                              services=services_in)
     reached = RestHelper(self.rest).rebalance_reached(retry_count=150)
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     rebalance.result()
     # Wait for eventing to catch up with all the update mutations and verify results after rebalance
     self.verify_all_handler(self.docs_per_day * self.num_docs)
     self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs * 2)
     # delete json documents
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default",is_delete=True)
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1",is_delete=True)
     # Wait for eventing to catch up with all the delete mutations and verify results
     self.verify_all_handler(0)
     self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs)
     self.undeploy_delete_all_functions()
     # Get all eventing nodes
     nodes_out_list = self.get_nodes_from_services_map(service_type="eventing", get_all_nodes=True)
     # rebalance out all eventing nodes
     rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init + 1], [], nodes_out_list)
     reached = RestHelper(self.rest).rebalance_reached(retry_count=150)
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     rebalance.result()
Exemplo n.º 3
0
 def test_eventing_rebalance_in_kill_eventing_consumer(self):
     eventing_node = self.get_nodes_from_services_map(service_type="eventing", get_all_nodes=False)
     sock_batch_size = self.input.param('sock_batch_size', 1)
     worker_count = self.input.param('worker_count', 3)
     cpp_worker_thread_count = self.input.param('cpp_worker_thread_count', 1)
     body = self.create_save_function_body(self.function_name, self.handler_code,
                                           sock_batch_size=sock_batch_size, worker_count=worker_count,
                                           cpp_worker_thread_count=cpp_worker_thread_count)
     if self.is_curl:
         body['depcfg']['curl'] = []
         body['depcfg']['curl'].append({"hostname": self.hostname, "value": "server", "auth_type": self.auth_type,
                                        "username": self.curl_username, "password": self.curl_password,"cookies": self.cookies})
     self.deploy_function(body)
     # load data
     self.load(self.gens_load, buckets=self.src_bucket, flag=self.item_flag, verify_data=False,
               batch_size=self.batch_size)
     if self.pause_resume:
         self.pause_function(body, wait_for_pause=False)
     # rebalance in a eventing node when eventing is processing mutations
     services_in = ["eventing"]
     rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], [self.servers[self.nodes_init]], [],
                                              services=services_in)
     self.sleep(5)
     reached = RestHelper(self.rest).rebalance_reached(percentage=60)
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     # kill eventing consumer when eventing is processing mutations
     self.kill_consumer(eventing_node)
     self.wait_for_handler_state(body['appname'], "deployed")
     rebalance.result()
     if self.pause_resume:
         self.resume_function(body)
     # Wait for eventing to catch up with all the update mutations and verify results after rebalance
     if self.is_sbm:
         self.verify_eventing_results(self.function_name, self.docs_per_day * 2016*2, skip_stats_validation=True)
     else:
         self.verify_eventing_results(self.function_name, self.docs_per_day * 2016, skip_stats_validation=True)
     # delete json documents
     self.load(self.gens_load, buckets=self.src_bucket, flag=self.item_flag, verify_data=False,
               batch_size=self.batch_size, op_type='delete')
     if self.pause_resume:
         self.pause_function(body)
         self.sleep(30)
         self.resume_function(body)
     # kill eventing consumer when eventing is processing mutations
     self.kill_consumer(eventing_node)
     self.wait_for_handler_state(body['appname'], "deployed")
     # Wait for eventing to catch up with all the delete mutations and verify results
     # This is required to ensure eventing works after rebalance goes through successfully
     if self.is_sbm:
         self.verify_eventing_results(self.function_name, self.docs_per_day * 2016, skip_stats_validation=True)
     else:
         self.verify_eventing_results(self.function_name, 0, skip_stats_validation=True)
     self.undeploy_and_delete_function(body)
     # Get all eventing nodes
     nodes_out_list = self.get_nodes_from_services_map(service_type="eventing", get_all_nodes=True)
     # rebalance out all eventing nodes
     rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init + 1], [], nodes_out_list)
     reached = RestHelper(self.rest).rebalance_reached()
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     rebalance.result()
    def test_create_scope_collection_rebalance_kv(self):
        bucket_name = "bucket1"
        scope_name = "scope1"
        collection_name = "collection1"

        self.cluster.create_standard_bucket(bucket_name, 11222,
                                            self.bucket_params)

        rebalance_result = self.cluster.async_rebalance(
            self.servers, [], [self.servers[1]])

        try:
            scope_created = self.cli_helper.create_scope(bucket=bucket_name,
                                                         scope=scope_name)
            self.assertTrue(scope_created,
                            "Cannot create scope during rebalance")
            collection_created = self.cli_helper.create_collection(
                bucket=bucket_name,
                scope="_default",
                collection=collection_name)
            self.assertTrue(collection_created,
                            "Cannot create collection during rebalance.")
        finally:
            #wait until rebalance is done
            RestHelper(self.rest).rebalance_reached(retry_count=150)
            time_limit = 100
            while time_limit > 0:
                if RestHelper(self.rest).is_cluster_rebalanced():
                    break
                else:
                    time_limit = time_limit - 1
                    self.sleep(10, "Waiting for rebalance finish.")
Exemplo n.º 5
0
 def test_multiple_handle_multiple_collections_swap_rebalance_kv(self):
     # load data
     self.load_data_to_all_source_collections()
     self.create_n_handler(self.num_handlers, self.num_src_buckets,
                           self.num_dst_buckets, self.handler_code)
     self.deploy_n_handler(self.deploy_handler, sequential=self.sequential)
     self.wait_for_handlers_to_deployed()
     # swap rebalance an kv node when eventing is processing mutations
     services_in = ["kv"]
     nodes_out_kv = self.servers[1]
     rebalance = self.cluster.async_rebalance(
         self.servers[:self.nodes_init], [self.servers[self.nodes_init]],
         [nodes_out_kv],
         services=services_in)
     reached = RestHelper(self.rest).rebalance_reached(retry_count=150)
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     rebalance.result()
     self.verify_destination_buckets(self.docs_per_day * self.num_docs)
     # delete load data
     self.load_data_to_all_source_collections(is_delete=True)
     self.verify_destination_buckets(0)
     self.undeploy_delete_all_handler()
     # Get all eventing nodes
     nodes_out_list = self.get_nodes_from_services_map(
         service_type="eventing", get_all_nodes=True)
     # rebalance out all eventing nodes
     rebalance = self.cluster.async_rebalance(
         self.servers[:self.nodes_init + 1], [], nodes_out_list)
     reached = RestHelper(self.rest).rebalance_reached(retry_count=150)
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     rebalance.result()
Exemplo n.º 6
0
    def setUp(self):
        super(FTSServerGroups, self).setUp()
        self.rest = RestConnection(self._cb_cluster.get_master_node())
        self.helper = RestHelper(self.rest)
        self.default_group_name = "Group 1"
        self.fts_query = {"match": "emp", "field": "type"}

        self._cleanup_server_groups()
 def test_eventing_rebalance_with_multiple_eventing_nodes(self):
     self.create_save_handlers()
     self.deploy_all_handlers()
     # load data
     task1=self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default",
                                  wait_for_loading=False)
     task2=self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1",
                                  wait_for_loading=False)
     # rebalance in a eventing nodes when eventing is processing mutations
     services_in = ["eventing", "eventing"]
     to_add_nodes = self.servers[self.nodes_init:self.nodes_init + 2]
     rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], to_add_nodes, [], services=services_in)
     reached = RestHelper(self.rest).rebalance_reached(retry_count=150)
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     rebalance.result()
     task1.result()
     task2.result()
     # Wait for eventing to catch up with all the update mutations and verify results after rebalance
     self.verify_all_handler(self.docs_per_day * self.num_docs)
     self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs * 2)
     # delete json documents
     task1=self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default", is_delete=True,
                                  wait_for_loading=False)
     task2=self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1", is_delete=True,
                                  wait_for_loading=False)
     # Get all eventing nodes
     nodes_out_list = self.get_nodes_from_services_map(service_type="eventing", get_all_nodes=True)
     # Remove 2 eventing nodes
     to_remove_nodes = nodes_out_list[0:2]
     self.log.info("Rebalance out eventing nodes {}".format(to_remove_nodes))
     # rebalance out 2 eventing nodes
     rebalance1 = self.cluster.async_rebalance(self.servers[:self.nodes_init + 2], [], to_remove_nodes)
     reached1 = RestHelper(self.rest).rebalance_reached(retry_count=150)
     self.assertTrue(reached1, "rebalance failed, stuck or did not complete")
     rebalance1.result()
     task1.result()
     task2.result()
     # Wait for eventing to catch up with all the delete mutations and verify results
     self.verify_all_handler(0)
     self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs)
     all_eventing_nodes = self.get_nodes_from_services_map(service_type="eventing", get_all_nodes=True)
     self.log.info("Eventing Nodes after rebalance out {}".format(all_eventing_nodes))
     #self.master = self.get_nodes_from_services_map(service_type="kv")
     # add the previously removed nodes as part of swap rebalance
     for node in to_remove_nodes:
         self.rest.add_node(self.master.rest_username, self.master.rest_password, node.ip, node.port,
                            services=["eventing"])
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default",
                                  wait_for_loading=False)
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1",
                                  wait_for_loading=False)
     rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], [], all_eventing_nodes)
     reached = RestHelper(self.rest).rebalance_reached(retry_count=150)
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     rebalance.result()
     self.verify_all_handler(self.docs_per_day * self.num_docs)
     self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs*3)
     self.undeploy_delete_all_functions()
Exemplo n.º 8
0
 def test_eventing_rebalance_with_n2n_encryption_and_enforce_tls(self):
     ntonencryptionBase().disable_nton_cluster([self.master])
     body = self.create_save_function_body(
         self.function_name, "handler_code/ABO/insert_rebalance.js")
     self.load(self.gens_load,
               buckets=self.src_bucket,
               flag=self.item_flag,
               verify_data=False,
               batch_size=self.batch_size)
     self.deploy_function(body)
     self.verify_doc_count_collections("dst_bucket._default._default",
                                       self.docs_per_day * self.num_docs)
     for level in ["control", "all", "strict"]:
         if self.pause_resume:
             self.pause_function(body)
         else:
             self.undeploy_function(body)
         ntonencryptionBase().setup_nton_cluster(
             [self.master], clusterEncryptionLevel=level)
         if self.x509enable:
             self.upload_x509_certs(self.servers[self.nodes_init])
         services_in = ["eventing"]
         rebalance = self.cluster.async_rebalance(
             self.servers[:self.nodes_init],
             [self.servers[self.nodes_init]], [],
             services=services_in)
         reached = RestHelper(self.rest).rebalance_reached(retry_count=150)
         self.assertTrue(reached,
                         "rebalance failed, stuck or did not complete")
         rebalance.result()
         if self.pause_resume:
             self.resume_function(body)
         else:
             self.deploy_function(body)
         self.load(self.gens_load,
                   buckets=self.src_bucket,
                   flag=self.item_flag,
                   verify_data=False,
                   batch_size=self.batch_size,
                   op_type='delete')
         self.verify_doc_count_collections("dst_bucket._default._default",
                                           0)
         rebalance = self.cluster.async_rebalance(
             self.servers[:self.nodes_init], [],
             [self.servers[self.nodes_init]])
         reached = RestHelper(self.rest).rebalance_reached(retry_count=150)
         self.assertTrue(reached,
                         "rebalance failed, stuck or did not complete")
         rebalance.result()
         self.load(self.gens_load,
                   buckets=self.src_bucket,
                   flag=self.item_flag,
                   verify_data=False,
                   batch_size=self.batch_size)
         self.verify_doc_count_collections(
             "dst_bucket._default._default",
             self.docs_per_day * self.num_docs)
     self.undeploy_and_delete_function(body)
Exemplo n.º 9
0
 def test_ns_server_with_rebalance_failover_with_redaction_enabled(self):
     kv_node = self.get_nodes_from_services_map(service_type="kv",
                                                get_all_nodes=False)
     rest = RestConnection(self.master)
     # load bucket and do some ops
     gen_create = BlobGenerator('logredac',
                                'logredac-',
                                self.value_size,
                                end=self.num_items)
     self._load_all_buckets(self.master, gen_create, "create", 0)
     gen_delete = BlobGenerator('logredac',
                                'logredac-',
                                self.value_size,
                                start=self.num_items / 2,
                                end=self.num_items)
     gen_update = BlobGenerator('logredac',
                                'logredac-',
                                self.value_size,
                                start=self.num_items + 1,
                                end=self.num_items * 3 / 2)
     self._load_all_buckets(self.master, gen_delete, "create", 0)
     self._load_all_buckets(self.master, gen_update, "create", 0)
     # set log redaction level, collect logs, verify log files exist and verify them for redaction
     self.set_redaction_level()
     self.start_logs_collection()
     services_in = ["kv"]
     to_add_nodes = [self.servers[self.nodes_init]]
     rebalance = self.cluster.async_rebalance(
         self.servers[:self.nodes_init],
         to_add_nodes, [],
         services=services_in)
     reached = RestHelper(rest).rebalance_reached()
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     rebalance.result()
     # failover a node
     server_failed_over = self.servers[self.nodes_init]
     fail_over_task = self.cluster.async_failover(
         [self.master], failover_nodes=[server_failed_over], graceful=True)
     fail_over_task.result()
     rebalance = self.cluster.async_rebalance(
         self.servers[:self.nodes_init], [], [server_failed_over])
     reached = RestHelper(rest).rebalance_reached()
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     rebalance.result()
     result = self.monitor_logs_collection()
     log.info(result)
     logs_path = result["perNode"]["[email protected]"]["path"]
     redactFileName = logs_path.split('/')[-1]
     nonredactFileName = logs_path.split('/')[-1].replace('-redacted', '')
     remotepath = logs_path[0:logs_path.rfind('/') + 1]
     self.verify_log_files_exist(remotepath=remotepath,
                                 redactFileName=redactFileName,
                                 nonredactFileName=nonredactFileName)
     self.verify_log_redaction(remotepath=remotepath,
                               redactFileName=redactFileName,
                               nonredactFileName=nonredactFileName,
                               logFileName="ns_server.debug.log")
 def test_swap_rebalance_with_different_topologies(self):
     self.server_out = self.input.param("server_out")
     self.services_in = self.input.param("services_in")
     self.create_save_handlers()
     self.deploy_all_handlers()
     # load data
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default",
                                  wait_for_loading=False)
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1",
                                  wait_for_loading=False)
     nodes_out_list = self.servers[self.server_out]
     # do a swap rebalance
     self.rest.add_node(self.master.rest_username, self.master.rest_password, self.servers[self.nodes_init].ip,
                        self.servers[self.nodes_init].port, services=[self.services_in])
     rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], [], [nodes_out_list])
     reached = RestHelper(self.rest).rebalance_reached(retry_count=150)
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     rebalance.result()
     # Wait for eventing to catch up with all the update mutations and verify results after rebalance
     self.verify_all_handler(self.docs_per_day * self.num_docs)
     self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs * 2)
     # delete json documents
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default", is_delete=True,
                                  wait_for_loading=False)
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1", is_delete=True,
                                  wait_for_loading=False)
     # Wait for eventing to catch up with all the delete mutations and verify results
     self.verify_all_handler(0)
     self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs)
     self.undeploy_delete_all_functions()
Exemplo n.º 11
0
 def remove_node(self,otpnode=None, wait_for_rebalance=True):
     nodes = self.rest.node_statuses()
     '''This is the case when master node is running cbas service as well'''
     if len(nodes) <= len(otpnode):
         return
     
     helper = RestHelper(self.rest)
     try:
         removed = helper.remove_nodes(knownNodes=[node.id for node in nodes],
                                           ejectedNodes=[node.id for node in otpnode],
                                           wait_for_rebalance=wait_for_rebalance)
     except Exception as e:
         self.sleep(5,"First time rebalance failed on Removal. Wait and try again. THIS IS A BUG.")
         removed = helper.remove_nodes(knownNodes=[node.id for node in nodes],
                                           ejectedNodes=[node.id for node in otpnode],
                                           wait_for_rebalance=wait_for_rebalance)
     if wait_for_rebalance:
         self.assertTrue(removed, "Rebalance operation failed while removing %s,"%otpnode)
Exemplo n.º 12
0
 def test_opposite_address_family_is_blocked(self):
     services_in = []
     for service in self.services_in.split("-"):
         services_in.append(service.split(":")[0])
     # Validate before the test starts
     self._validate_ip_addrress_family()
     nodes_in = self.servers[self.nodes_init:]
     rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], nodes_in, [],
                                              services=services_in)
     self.sleep(2)
     rest = RestConnection(self.master)
     reached = RestHelper(rest).rebalance_reached(percentage=30)
     if self.change_addr_family:
         if self.ipv4_only:
             cli = CouchbaseCLI(self.master, self.master.rest_username, self.master.rest_password)
             cli.setting_autofailover(0, 60)
             _, _, success = cli.set_ip_family("ipv6only")
             if not success:
                 self.fail("Unable to change ip-family to ipv6only")
             self.check_ip_family_enforcement(ip_family="ipv6_only")
             self.sleep(2)
             _, _, success = cli.set_ip_family("ipv4only")
             if not success:
                 self.fail("Unable to change ip-family to ipv4only")
             cli.setting_autofailover(1, 60)
             self.check_ip_family_enforcement(ip_family="ipv4_only")
         if self.ipv6_only:
             cli = CouchbaseCLI(self.master, self.master.rest_username, self.master.rest_password)
             cli.setting_autofailover(0, 60)
             _, _, success = cli.set_ip_family("ipv4only")
             if not success:
                 self.fail("Unable to change ip-family to ipv4only")
             self.check_ip_family_enforcement(ip_family="ipv4_only")
             self.sleep(2)
             _, _, success = cli.set_ip_family("ipv6only")
             if not success:
                 self.fail("Unable to change ip-family to ipv6only")
             cli.setting_autofailover(1, 60)
             self.check_ip_family_enforcement(ip_family="ipv6_only")
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     # Validate during rebalance
     self._validate_ip_addrress_family()
     rebalance.result()
     self.sleep(20)
     # Validate post rebalance
     self._validate_ip_addrress_family()
     # Reboot the master node
     shell = RemoteMachineShellConnection(self.master)
     shell.reboot_node()
     self.sleep(180)
     # Validate post reboot
     self._validate_ip_addrress_family()
 def test_eventing_rebalance_in_delete_recreate_collections(self):
     self.create_save_handlers()
     self.deploy_all_handlers()
     # load data
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default",wait_for_loading=False)
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1",wait_for_loading=False)
     # rebalance in a eventing node when eventing is processing mutations
     services_in = ["eventing"]
     rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], [self.servers[self.nodes_init]], [],
                                              services=services_in)
     self.collection_rest.delete_collection("dst_bucket","scope_1","coll_0")
     self.collection_rest.delete_collection("dst_bucket","scope_1","coll_1")
     self.collection_rest.delete_collection("dst_bucket","scope_1","coll_2")
     reached = RestHelper(self.rest).rebalance_reached(retry_count=150)
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     rebalance.result()
     # Wait for eventing to catch up with all the update mutations and verify results after rebalance
     # self.verify_all_handler(self.docs_per_day * self.num_docs)
     self.verify_doc_count_collections("dst_bucket.scope_1.coll_3", self.docs_per_day * self.num_docs)
     self.verify_doc_count_collections("dst_bucket.scope_1.coll_4", self.docs_per_day * self.num_docs)
     self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs*2)
     # rebalance in a eventing node when eventing is processing mutations
     services_in = ["eventing"]
     rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init+1], [self.servers[self.nodes_init+1]], [],
                                              services=services_in)
     self.collection_rest.create_collection("dst_bucket", "scope_1", "coll_0")
     self.collection_rest.create_collection("dst_bucket", "scope_1", "coll_1")
     self.collection_rest.create_collection("dst_bucket", "scope_1", "coll_2")
     # delete json documents
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default", is_delete=True)
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1", is_delete=True)
     reached = RestHelper(self.rest).rebalance_reached(retry_count=150)
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     rebalance.result()
     self.verify_doc_count_collections("dst_bucket.scope_1.coll_3", 0)
     self.verify_doc_count_collections("dst_bucket.scope_1.coll_4", 0)
     self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs)
     self.undeploy_delete_all_functions()
 def test_eventing_rebalance_swap_delete_recreate_collections(self):
     self.create_save_handlers()
     self.deploy_all_handlers()
     # load data
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default",wait_for_loading=False)
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1",wait_for_loading=False)
     # swap rebalance an eventing node when eventing is processing mutations
     services_in = ["eventing"]
     nodes_out_ev = self.get_nodes_from_services_map(service_type="eventing", get_all_nodes=True)
     rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], [self.servers[self.nodes_init]],
                                              nodes_out_ev, services=services_in)
     self.collection_rest.delete_collection("dst_bucket", "scope_1", "coll_0")
     self.collection_rest.delete_collection("dst_bucket", "scope_1", "coll_1")
     self.collection_rest.delete_collection("dst_bucket", "scope_1", "coll_2")
     reached = RestHelper(self.rest).rebalance_reached(retry_count=150)
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     rebalance.result()
     self.verify_doc_count_collections("dst_bucket.scope_1.coll_3", self.docs_per_day * self.num_docs)
     self.verify_doc_count_collections("dst_bucket.scope_1.coll_4", self.docs_per_day * self.num_docs)
     self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs * 2)
     # rebalance out a eventing node when eventing is processing mutations
     services_in = ["eventing"]
     nodes_out_ev = self.get_nodes_from_services_map(service_type="eventing", get_all_nodes=True)
     rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init+1], [self.servers[self.nodes_init+1]],
                                              nodes_out_ev, services=services_in)
     self.collection_rest.create_collection("dst_bucket", "scope_1", "coll_0")
     self.collection_rest.create_collection("dst_bucket", "scope_1", "coll_1")
     self.collection_rest.create_collection("dst_bucket", "scope_1", "coll_2")
     # delete json documents
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket._default._default", is_delete=True)
     self.load_data_to_collection(self.docs_per_day * self.num_docs, "src_bucket.scope_1.coll_1", is_delete=True)
     reached = RestHelper(self.rest).rebalance_reached(retry_count=150)
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     rebalance.result()
     self.verify_all_handler(0)
     self.verify_doc_count_collections("src_bucket.scope_1.coll_1", self.docs_per_day * self.num_docs)
     self.undeploy_delete_all_functions()
Exemplo n.º 15
0
 def test_n1ql_gc_rebalance(self):
     self.n1ql_helper.create_primary_index(using_gsi=True,
                                           server=self.n1ql_node)
     self.load_sample_buckets(self.server, "travel-sample")
     worker_count = self.input.param('worker_count', 12)
     body = self.create_save_function_body(self.function_name,
                                           self.handler_code,
                                           worker_count=worker_count)
     self.deploy_function(body)
     # load data
     self.load(self.gens_load,
               buckets=self.src_bucket,
               flag=self.item_flag,
               verify_data=False,
               batch_size=self.batch_size)
     if self.pause_resume:
         self.pause_function(body)
     # rebalance in a eventing node when eventing is processing mutations
     services_in = ["eventing"]
     rebalance = self.cluster.async_rebalance(
         self.servers[:self.nodes_init], [self.servers[self.nodes_init]],
         [],
         services=services_in)
     reached = RestHelper(self.rest).rebalance_reached(retry_count=150)
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     rebalance.result()
     if self.pause_resume:
         self.resume_function(body)
     # Wait for eventing to catch up with all the update mutations and verify results after rebalance
     self.verify_eventing_results(self.function_name,
                                  self.docs_per_day * 2016,
                                  skip_stats_validation=True)
     # delete json documents
     self.load(self.gens_load,
               buckets=self.src_bucket,
               flag=self.item_flag,
               verify_data=False,
               batch_size=self.batch_size,
               op_type='delete')
     if self.pause_resume:
         self.pause_function(body)
         self.sleep(30)
         self.resume_function(body)
     # Wait for eventing to catch up with all the delete mutations and verify results
     self.verify_eventing_results(self.function_name,
                                  0,
                                  skip_stats_validation=True)
     self.undeploy_and_delete_function(body)
    def test_volume(self):
        nodes_in_cluster = [self.servers[0]]
        print "Start Time: %s" % str(
            time.strftime("%H:%M:%S", time.gmtime(time.time())))

        ########################################################################################################################
        self.log.info("Add a N1QL/Index nodes")
        self.query_node = self.servers[1]
        rest = RestConnection(self.query_node)
        rest.set_data_path(data_path=self.query_node.data_path,
                           index_path=self.query_node.index_path,
                           cbas_path=self.query_node.cbas_path)
        result = self.add_node(self.query_node, rebalance=False)
        self.assertTrue(result, msg="Failed to add N1QL/Index node.")

        self.log.info("Add a KV nodes")
        result = self.add_node(self.servers[2],
                               services=["kv"],
                               rebalance=True)
        self.assertTrue(result, msg="Failed to add KV node.")

        nodes_in_cluster = nodes_in_cluster + [
            self.servers[1], self.servers[2]
        ]
        ########################################################################################################################
        self.log.info("Step 2: Create Couchbase buckets.")
        self.create_required_buckets()
        for node in nodes_in_cluster:
            NodeHelper.do_a_warm_up(node)
            NodeHelper.wait_service_started(node)
        ########################################################################################################################
        self.log.info(
            "Step 3: Create 10M docs average of 1k docs for 8 couchbase buckets."
        )
        env = DefaultCouchbaseEnvironment.builder().mutationTokensEnabled(
            True).computationPoolSize(5).socketConnectTimeout(
                100000).connectTimeout(100000).maxRequestLifetime(
                    TimeUnit.SECONDS.toMillis(300)).build()
        cluster = CouchbaseCluster.create(env, self.master.ip)
        cluster.authenticate("Administrator", "password")
        bucket = cluster.openBucket("GleambookUsers")

        pool = Executors.newFixedThreadPool(5)
        items_start_from = 0
        total_num_items = self.input.param("num_items", 5000)

        executors = []
        num_executors = 5
        doc_executors = 5
        num_items = total_num_items / num_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket,
                                        num_items,
                                        items_start_from + i * num_items,
                                        batch_size=2000))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items
        ########################################################################################################################
        self.sleep(120, "Sleeping after 1st cycle.")
        self.log.info("Step 8: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.sleep(120, "Sleeping after 2nd cycle.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 5
        num_items = total_num_items / doc_executors

        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket,
                                        num_items,
                                        items_start_from + i * num_items,
                                        batch_size=2000))
        rebalance = self.cluster.async_rebalance(nodes_in_cluster,
                                                 [self.servers[3]], [])
        futures = pool.invokeAll(executors)

        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")

        bucket.close()
        cluster.disconnect()

        print "End Time: %s" % str(
            time.strftime("%H:%M:%S", time.gmtime(time.time())))
Exemplo n.º 17
0
    def test_cbcollect_with_redaction_enabled_with_xdcr(self):
        rest_src = RestConnection(self.master)
        rest_src.remove_all_replications()
        rest_src.remove_all_remote_clusters()

        rest_dest = RestConnection(self.servers[1])
        rest_dest_helper = RestHelper(rest_dest)

        try:
            rest_src.remove_all_replications()
            rest_src.remove_all_remote_clusters()
            self.set_redaction_level()
            rest_src.add_remote_cluster(self.servers[1].ip, self.servers[1].port,
                                        self.servers[1].rest_username,
                                        self.servers[1].rest_password, "C2")

            """ at dest cluster """
            self.add_built_in_server_user(node=self.servers[1])
            rest_dest.create_bucket(bucket='default', ramQuotaMB=512)
            bucket_ready = rest_dest_helper.vbucket_map_ready('default')
            if not bucket_ready:
                self.fail("Bucket default at dest not created after 120 seconds.")
            repl_id = rest_src.start_replication('continuous', 'default', "C2")
            if repl_id is not None:
                self.log.info("Replication created successfully")
            gen = BlobGenerator("ent-backup", "ent-backup-", self.value_size, end=self.num_items)
            tasks = self._async_load_all_buckets(self.master, gen, "create", 0)
            for task in tasks:
                task.result()
            self.sleep(10)

            """ enable firewall """
            if self.interrupt_replication:
                RemoteUtilHelper.enable_firewall(self.master, xdcr=True)

            """ start collect logs """
            self.start_logs_collection()
            result = self.monitor_logs_collection()
            """ verify logs """
            try:
                logs_path = result["perNode"]["ns_1@" + str(self.master.ip)]["path"]
            except KeyError:
                logs_path = result["perNode"]["[email protected]"]["path"]
            redactFileName = logs_path.split('/')[-1]
            nonredactFileName = logs_path.split('/')[-1].replace('-redacted', '')
            remotepath = logs_path[0:logs_path.rfind('/')+1]
            self.verify_log_files_exist(remotepath=remotepath,
                                    redactFileName=redactFileName,
                                    nonredactFileName=nonredactFileName)
            self.log.info("Verify on log ns_server.goxdcr.log")
            self.verify_log_redaction(remotepath=remotepath,
                                  redactFileName=redactFileName,
                                  nonredactFileName=nonredactFileName,
                                  logFileName="ns_server.goxdcr.log")
        finally:
            """ clean up xdcr """
            rest_dest.delete_bucket()
            rest_src.remove_all_replications()
            rest_src.remove_all_remote_clusters()
            if self.interrupt_replication:
                shell = RemoteMachineShellConnection(self.master)
                shell.disable_firewall()
                shell.disconnect()
Exemplo n.º 18
0
    def test_cbcollect_with_redaction_enabled_with_xdcr(self):
        rest_src = RestConnection(self.master)
        rest_src.remove_all_replications()
        rest_src.remove_all_remote_clusters()

        rest_dest = RestConnection(self.servers[1])
        rest_dest_helper = RestHelper(rest_dest)

        try:
            rest_src.remove_all_replications()
            rest_src.remove_all_remote_clusters()
            self.set_redaction_level()
            rest_src.add_remote_cluster(self.servers[1].ip,
                                        self.servers[1].port,
                                        self.servers[1].rest_username,
                                        self.servers[1].rest_password, "C2")
            """ at dest cluster """
            self.add_built_in_server_user(node=self.servers[1])
            rest_dest.create_bucket(bucket='default', ramQuotaMB=512)
            bucket_ready = rest_dest_helper.vbucket_map_ready('default')
            if not bucket_ready:
                self.fail(
                    "Bucket default at dest not created after 120 seconds.")
            repl_id = rest_src.start_replication('continuous', 'default', "C2")
            if repl_id is not None:
                self.log.info("Replication created successfully")
            gen = BlobGenerator("ent-backup",
                                "ent-backup-",
                                self.value_size,
                                end=self.num_items)
            tasks = self._async_load_all_buckets(self.master, gen, "create", 0)
            for task in tasks:
                task.result()
            self.sleep(10)
            """ enable firewall """
            if self.interrupt_replication:
                RemoteUtilHelper.enable_firewall(self.master, xdcr=True)
            """ start collect logs """
            self.start_logs_collection()
            result = self.monitor_logs_collection()
            """ verify logs """
            try:
                logs_path = result["perNode"]["ns_1@" +
                                              str(self.master.ip)]["path"]
            except KeyError:
                logs_path = result["perNode"]["[email protected]"]["path"]
            redactFileName = logs_path.split('/')[-1]
            nonredactFileName = logs_path.split('/')[-1].replace(
                '-redacted', '')
            remotepath = logs_path[0:logs_path.rfind('/') + 1]
            self.verify_log_files_exist(remotepath=remotepath,
                                        redactFileName=redactFileName,
                                        nonredactFileName=nonredactFileName)
            self.log.info("Verify on log ns_server.goxdcr.log")
            self.verify_log_redaction(remotepath=remotepath,
                                      redactFileName=redactFileName,
                                      nonredactFileName=nonredactFileName,
                                      logFileName="ns_server.goxdcr.log")
        finally:
            """ clean up xdcr """
            rest_dest.delete_bucket()
            rest_src.remove_all_replications()
            rest_src.remove_all_remote_clusters()
            if self.interrupt_replication:
                shell = RemoteMachineShellConnection(self.master)
                shell.disable_firewall()
                shell.disconnect()
Exemplo n.º 19
0
    def test_ups_volume(self):
        nodes_in_cluster = [self.servers[0]]
        print "Start Time: %s" % str(
            time.strftime("%H:%M:%S", time.gmtime(time.time())))

        ########################################################################################################################
        self.log.info("Add a KV nodes - 2")
        self.query_node = self.servers[1]
        rest = RestConnection(self.servers[1])
        rest.set_data_path(data_path=self.servers[1].data_path,
                           index_path=self.servers[1].index_path,
                           cbas_path=self.servers[1].cbas_path)
        result = self.add_node(self.servers[1], rebalance=False)
        self.assertTrue(result, msg="Failed to add N1QL/Index node.")

        self.log.info("Add a KV nodes - 3")
        rest = RestConnection(self.servers[2])
        rest.set_data_path(data_path=self.kv_servers[1].data_path,
                           index_path=self.kv_servers[1].index_path,
                           cbas_path=self.kv_servers[1].cbas_path)
        result = self.add_node(self.kv_servers[1],
                               services=["kv"],
                               rebalance=False)
        self.assertTrue(result, msg="Failed to add KV node.")

        self.log.info("Add one more KV node")
        rest = RestConnection(self.servers[3])
        rest.set_data_path(data_path=self.kv_servers[3].data_path,
                           index_path=self.kv_servers[3].index_path,
                           cbas_path=self.kv_servers[3].cbas_path)
        result = self.add_node(self.kv_servers[3],
                               services=["kv"],
                               rebalance=False)
        self.assertTrue(result, msg="Failed to add KV node.")

        self.log.info("Add one more KV node")
        rest = RestConnection(self.servers[4])
        rest.set_data_path(data_path=self.kv_servers[4].data_path,
                           index_path=self.kv_servers[4].index_path,
                           cbas_path=self.kv_servers[4].cbas_path)
        result = self.add_node(self.kv_servers[4],
                               services=["kv"],
                               rebalance=False)
        self.assertTrue(result, msg="Failed to add KV node.")

        nodes_in_cluster = nodes_in_cluster + [
            self.servers[1], self.servers[2], self.servers[3], self.servers[4]
        ]
        ########################################################################################################################
        self.log.info("Step 2: Create Couchbase buckets.")
        self.create_required_buckets()

        ########################################################################################################################
        self.log.info(
            "Step 3: Create 10M docs average of 1k docs for 8 couchbase buckets."
        )
        env = DefaultCouchbaseEnvironment.builder().mutationTokensEnabled(
            True).computationPoolSize(5).socketConnectTimeout(
                100000).connectTimeout(100000).maxRequestLifetime(
                    TimeUnit.SECONDS.toMillis(300)).build()
        cluster = CouchbaseCluster.create(env, self.master.ip)
        cluster.authenticate("Administrator", "password")
        bucket = cluster.openBucket("GleambookUsers")
        msg_bucket = cluster.openBucket("GleambookMessages")

        pool = Executors.newFixedThreadPool(5)
        items_start_from = 0
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 5
        num_items = total_num_items / num_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items

        ########################################################################################################################
        self.log.info("Step 6: Verify the items count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 8: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        updates_from, "update"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        deletes_from, "delete"))
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        ########################################################################################################################
        self.log.info("Step 9: Connect cbas buckets.")
        self.connect_cbas_buckets()
        self.sleep(10, "Wait for the ingestion to complete")

        ########################################################################################################################
        self.log.info("Step 10: Verify the items count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info(
            "Step 12: When 11 is in progress do a KV Rebalance in of 1 nodes.")
        rest = RestConnection(self.servers[5])
        rest.set_data_path(data_path=self.servers[5].data_path,
                           index_path=self.servers[5].index_path,
                           cbas_path=self.servers[5].cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster,
                                                 [self.servers[5]], [])
        nodes_in_cluster += [self.servers[2]]
        ########################################################################################################################
        self.log.info("Step 11: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4
        num_items = total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items

        ########################################################################################################################
        self.log.info("Step 13: Wait for rebalance to complete.")
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)

        ########################################################################################################################
        self.log.info("Step 14: Verify the items count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 15: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        updates_from, "update"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        deletes_from, "delete"))

        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        ########################################################################################################################
        self.log.info(
            "Step 16: Verify Results that 1M docs gets deleted from analytics datasets."
        )
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 17: Disconnect CBAS buckets.")
        self.disconnect_cbas_buckets()

        ########################################################################################################################
        self.log.info("Step 18: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4
        num_items = total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items

        ########################################################################################################################
        self.log.info("Step 20: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        pool = Executors.newFixedThreadPool(5)
        executors = []
        num_executors = 5

        self.log.info(
            "Step 22: When 21 is in progress do a KV Rebalance out of 2 nodes."
        )
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, [],
                                                 self.servers[1:3])
        nodes_in_cluster = [
            node for node in nodes_in_cluster if node not in self.servers[1:3]
        ]

        futures = pool.invokeAll(executors)
        self.log.info("Step 23: Wait for rebalance.")
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)

        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 24: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 6
        doc_executors = 4
        num_items = total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))

        ##################################################### NEED TO BE UPDATED ##################################################################
        self.log.info(
            "Step 25: When 24 is in progress do a KV Rebalance in of 2 nodes.")
        for node in self.servers[1:3]:
            rest = RestConnection(node)
            rest.set_data_path(data_path=node.data_path,
                               index_path=node.index_path,
                               cbas_path=node.cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster,
                                                 self.servers[1:3], [])
        nodes_in_cluster = nodes_in_cluster + self.servers[1:3]
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items

        self.log.info("Step 27: Wait for rebalance to complete.")
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")

        ########################################################################################################################
        self.log.info("Step 28: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 29: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        updates_from, "update"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        deletes_from, "delete"))

        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        ########################################################################################################################
        self.log.info("Step 30: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 31: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4
        num_items = total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))

        ###################################################### NEED TO BE UPDATED ##################################################################
        self.log.info(
            "Step 32: When 31 is in progress do a KV Rebalance out of 2 nodes."
        )
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, [],
                                                 self.servers[1:3])
        nodes_in_cluster = [
            node for node in nodes_in_cluster if node not in self.servers[1:3]
        ]
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items
        ########################################################################################################################
        self.log.info("Step 33: Wait for rebalance to complete.")
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)

        ########################################################################################################################
        self.log.info("Step 34: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 35: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        updates_from, "update"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        deletes_from, "delete"))

        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 36: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 37: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4
        num_items = total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))

        ###################################################### NEED TO BE UPDATED ##################################################################
        self.log.info(
            "Step 38: When 37 is in progress do a CBAS SWAP Rebalance of 2 nodes."
        )
        for node in self.cbas_servers[-1:]:
            rest = RestConnection(node)
            rest.set_data_path(data_path=node.data_path,
                               index_path=node.index_path,
                               cbas_path=node.cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster,
                                                 self.servers[6],
                                                 [self.servers[5]],
                                                 services=["kv"],
                                                 check_vbucket_shuffling=False)
        nodes_in_cluster += self.servers[6]
        nodes_in_cluster.remove(self.servers[5])
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 39: Wait for rebalance to complete.")
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items

        ########################################################################################################################
        self.log.info("Step 40: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 41: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        updates_from, "update"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        deletes_from, "delete"))

        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 42: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 43: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4
        num_items = total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))

        ###################################################### NEED TO BE UPDATED ##################################################################
        self.log.info("Step 44: When 43 is in progress do a KV Rebalance IN.")
        rest = RestConnection(self.servers[5])
        rest.set_data_path(data_path=self.servers[5].data_path,
                           index_path=self.servers[5].index_path,
                           cbas_path=self.servers[5].cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster,
                                                 [self.servers[5]], [],
                                                 services=["kv"])
        nodes_in_cluster += [self.servers[5]]
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 45: Wait for rebalance to complete.")
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items

        ########################################################################################################################
        self.log.info("Step 46: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 47: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        updates_from, "update"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        deletes_from, "delete"))

        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 48: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 49: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4
        num_items = total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))

        ########################################################################################################################
        self.log.info(
            "Step 50: When 49 is in progress do a KV+CBAS Rebalance OUT.")
        rest = RestConnection(self.servers[6])
        rest.set_data_path(data_path=self.servers[6].data_path,
                           index_path=self.servers[6].index_path,
                           cbas_path=self.kv_servers[6].cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, [],
                                                 [self.servers[6]])
        nodes_in_cluster.remove(self.servers[6])

        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 51: Wait for rebalance to complete.")
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)
        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items

        ########################################################################################################################
        self.log.info("Step 52: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 53: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        updates_from, "update"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        deletes_from, "delete"))

        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 54: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 55: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4
        num_items = total_num_items / doc_executors
        for i in xrange(doc_executors):
            executors.append(
                GleambookUser_Docloader(bucket, num_items,
                                        items_start_from + i * num_items))
            executors.append(
                GleambookMessages_Docloader(msg_bucket, num_items,
                                            items_start_from + i * num_items))

        ########################################################################################################################
        self.log.info(
            "Step 56: When 55 is in progress do a KV+CBAS SWAP Rebalance .")
        rest = RestConnection(self.servers[7])
        rest.set_data_path(data_path=self.servers[7].data_path,
                           index_path=self.servers[7].index_path,
                           cbas_path=self.servers[7].cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster,
                                                 [self.servers[7]],
                                                 [self.servers[6]])
        #         rebalance.get_result()
        nodes_in_cluster.remove(self.servers[6])
        nodes_in_cluster += [self.servers[7]]

        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 57: Wait for rebalance to complete.")
        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=240)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)

        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items / 10
        items_start_from += total_num_items

        ########################################################################################################################
        self.log.info("Step 58: Verify the docs count.")
        self.validate_items_count()

        ########################################################################################################################
        self.log.info("Step 59: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items", 5000)
        executors = []
        num_executors = 5
        doc_executors = 4

        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, updates_from,
                                    "update"))
        executors.append(
            GleambookUser_Docloader(bucket, num_items / 10, deletes_from,
                                    "delete"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        updates_from, "update"))
        executors.append(
            GleambookMessages_Docloader(msg_bucket, num_items / 10,
                                        deletes_from, "delete"))

        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)

        ########################################################################################################################
        self.log.info("Step 60: Verify the docs count.")
        self.validate_items_count()

        bucket.close()
        msg_bucket.close()
        cluster.disconnect()

        print "End Time: %s" % str(
            time.strftime("%H:%M:%S", time.gmtime(time.time())))
Exemplo n.º 20
0
    def test_volume(self):
        nodes_in_cluster = [self.servers[0]]
        print "Start Time: %s" % str(
            time.strftime("%H:%M:%S", time.gmtime(time.time())))

        #######################################################################
        self.log.info("Step 1: Add a N1QL/Index nodes")
        self.query_node = self.servers[1]
        rest = RestConnection(self.query_node)
        rest.set_data_path(data_path=self.query_node.data_path,
                           index_path=self.query_node.index_path,
                           cbas_path=self.query_node.cbas_path)
        result = self.add_node(self.query_node, rebalance=False)
        self.assertTrue(result, msg="Failed to add N1QL/Index node.")

        self.log.info("Step 2: Add a KV nodes")
        result = self.add_node(self.servers[2],
                               services=["kv"],
                               rebalance=True)
        self.assertTrue(result, msg="Failed to add KV node.")

        nodes_in_cluster = nodes_in_cluster + [
            self.servers[1], self.servers[2]
        ]

        #######################################################################

        self.log.info("Step 3: Create Couchbase buckets.")
        self.create_required_buckets()

        #######################################################################

        env = DefaultCouchbaseEnvironment.builder().mutationTokensEnabled(
            True).computationPoolSize(5).socketConnectTimeout(
                10000000).connectTimeout(10000000).maxRequestLifetime(
                    TimeUnit.SECONDS.toMillis(1200)).build()

        try:
            System.setProperty("com.couchbase.forceIPv4", "false")
            logger = Logger.getLogger("com.couchbase.client")
            logger.setLevel(Level.SEVERE)
            for h in logger.getParent().getHandlers():
                if isinstance(h, ConsoleHandler):
                    h.setLevel(Level.SEVERE)

            cluster = CouchbaseCluster.create(env, self.master.ip)
            cluster.authenticate("Administrator", "password")
            self.bucket = cluster.openBucket("GleambookUsers")
            self.msg_bucket = cluster.openBucket("GleambookMessages")
        except CouchbaseException:
            print "cannot login from user: %s/%s" % (self.username,
                                                     self.password)
            raise

        self.c = cluster
        self.items_start_from = 0
        self.total_num_items = self.input.param("num_items", 5000)
        self.load_data()

        self.sleep(20, "Sleeping after 4th step.")

        self.validate_items_count()

        self.log.info("Step 4: Add node")
        result = self.add_node(self.servers[3], rebalance=False)
        self.assertTrue(result, msg="Failed to add node.")
        self.log.info("Step 5: Loading %s items" % self.total_num_items)
        self.load_data()

        self.log.info("Step 6: Rebalance Cluster")
        rebalance = self.rebalance()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        nodes_in_cluster = nodes_in_cluster + [self.servers[3]]

        self.log.info("Step 7: Start Verification")
        self.validate_items_count()
        self.check_snap_start_corruption()

        #######################################################################
        self.sleep(20)
        self.log.info("Step 8: Delete/Update docs.")
        self.update_data()

        self.log.info("Step 9: Verifying Data")
        self.validate_items_count()
        self.check_snap_start_corruption()

        #######################################################################
        self.log.info("Step 10: Removing node and Rebalance cluster")
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, [],
                                                 [self.servers[3]])
        nodes_in_cluster.remove(self.servers[3])

        self.log.info("Step 11: Loading %s items" % self.total_num_items)
        self.load_data()

        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")

        self.log.info("Step 12: Verifying Data")
        self.validate_items_count()
        self.check_snap_start_corruption()

        #######################################################################
        self.sleep(20)
        self.log.info("Step 13: Delete/Update docs.")
        self.update_data()

        self.log.info("Step 14: Verifying Data")
        self.validate_items_count()
        self.check_snap_start_corruption()

        #######################################################################
        self.sleep(20)
        self.log.info("Step 15: Add node")
        result = self.add_node(self.servers[3], rebalance=False)
        nodes_in_cluster = nodes_in_cluster + [self.servers[3]]

        self.log.info("Step 16: Loading %s items" % self.total_num_items)
        self.load_data()

        self.log.info("Step 17: Rebalancing Cluster")
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, [],
                                                 [self.servers[2]])

        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")

        nodes_in_cluster.remove(self.servers[2])

        self.log.info("Step 18: Verifying Data")
        self.validate_items_count()
        self.check_snap_start_corruption()

        #######################################################################
        self.sleep(20)
        self.log.info("Step 19: Delete/Update docs.")
        self.update_data()

        self.log.info("Step 20: Verifying Data")
        self.validate_items_count()
        self.check_snap_start_corruption()

        #######################################################################
        self.sleep(20)
        self.log.info("Step 21: Add node")
        result = self.add_node(self.servers[2], rebalance=False)

        self.log.info("Step 22: Loading %s items" % self.total_num_items)
        self.load_data()

        self.log.info("Step 23: Rebalancing Cluster")
        rebalance = self.rebalance()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        nodes_in_cluster = nodes_in_cluster + [self.servers[2]]

        self.log.info("Step 24: Verifying Data")
        self.validate_items_count()
        self.check_snap_start_corruption()

        #######################################################################
        self.sleep(20)
        self.log.info("Step 25: Delete/Update docs.")
        self.update_data()

        self.log.info("Step 26: Verifying Data")
        self.validate_items_count()
        self.check_snap_start_corruption()

        #######################################################################
        self.sleep(20)

        self.log.info("Step 27: Add node")
        result = self.add_node(self.servers[4], rebalance=False)

        self.log.info("Step 28: Loading %s items" % self.total_num_items)
        self.load_data()

        self.log.info("Step 29: Rebalancing Cluster")
        rebalance = self.rebalance()
        nodes_in_cluster = nodes_in_cluster + [self.servers[4]]
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")

        self.log.info("Step 30: Verifying Data")
        self.validate_items_count()
        self.check_snap_start_corruption()

        #######################################################################
        self.sleep(20)
        self.log.info("Step 31: Delete/Update docs.")
        self.update_data()

        self.log.info("Step 32: Verifying Data")
        self.validate_items_count()
        self.check_snap_start_corruption()

        #######################################################################
        self.sleep(20)
        self.log.info("Step 33: Removing node, Rebalancing Cluster")
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, [],
                                                 [self.servers[3]])
        nodes_in_cluster.remove(self.servers[3])

        self.log.info("Step 34: Loading %s items" % self.total_num_items)
        self.load_data()

        rebalance.get_result()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")

        self.log.info("Step 35: Verifying Data")
        self.validate_items_count()
        self.check_snap_start_corruption()

        #######################################################################
        self.sleep(20)

        self.log.info("Step 36: Adding 3 nodes")
        otp1 = self.add_node(self.servers[5], rebalance=False)
        otp2 = self.add_node(self.servers[6], rebalance=False)
        otp3 = self.add_node(self.servers[7], rebalance=False)

        self.log.info("Step 37: Loading %s items" % self.total_num_items)
        self.load_data()

        self.log.info("Step 38: Rebalancing Cluster")
        rebalance = self.rebalance()
        nodes_in_cluster = nodes_in_cluster + [
            self.servers[5], self.servers[6], self.servers[7]
        ]
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")

        self.log.info("Step 39: Verifying Data")
        self.validate_items_count()
        self.check_snap_start_corruption()

        #######################################################################
        self.log.info("Step 40: Graceful failover node")
        self.rest.fail_over(otp3.id, graceful=True)
        self.log.info("Step 41: Loading %s items" % self.total_num_items)
        self.load_data()
        self.sleep(10)
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")

        self.log.info("Step 42: Rebalancing Cluster")
        rebalance = self.rebalance()
        nodes_in_cluster.remove(self.servers[7])
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")

        #######################################################################
        self.log.info("Step 43: Adding node and rebalancing")
        otp3 = self.add_node(self.servers[7], rebalance=True)
        nodes_in_cluster = nodes_in_cluster + [self.servers[7]]

        #######################################################################

        self.log.info("Step 44: Graceful failover node")
        self.rest.fail_over(otp3.id, graceful=True)
        self.log.info("Step 41: Loading %s items" % self.total_num_items)
        self.load_data()
        self.sleep(10)
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")

        self.log.info("Step 45: Delta recover node")
        self.rest.set_recovery_type(otp3.id, "delta")

        self.log.info("Step 46: Add node back to cluster")
        self.rest.add_back_node(otp3.id)

        rebalance = self.rebalance()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")

        self.log.info("Step 47: Graceful failover node")
        self.rest.fail_over(otp2.id, graceful=True)
        self.log.info("Step 48: Loading %s items" % self.total_num_items)
        self.load_data()
        self.sleep(10)
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")

        self.log.info("Step 49: Delta recover node")
        self.rest.set_recovery_type(otp2.id, "full")

        self.log.info("Step 50: Add node back to cluster")
        self.rest.add_back_node(otp2.id)

        rebalance = self.rebalance()
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")

        self.bucket.close()
        self.msg_bucket.close()
        cluster.disconnect()
Exemplo n.º 21
0
class PerfBase(unittest.TestCase):
    """
    specURL = http://hub.internal.couchbase.org/confluence/display/cbit/Black+Box+Performance+Test+Matrix

    """

    # The setUpBaseX() methods allow subclasses to resequence the setUp() and
    # skip cluster configuration.
    def setUpBase0(self):
        self.log = logger.Logger.get_logger()
        self.input = TestInputSingleton.input
        if self.input.param("log_level", None):
            self.log.setLevel(level=0)
            for hd in self.log.handlers:
                if str(hd.__class__).find('FileHandler') != -1:
                    hd.setLevel(level=logging.DEBUG)
                else:
                    hd.setLevel(level=getattr(
                        logging, self.input.param("log_level", None)))
        self.vbucket_count = PerfDefaults.vbuckets
        self.sc = None
        if self.parami("tear_down_on_setup",
                       PerfDefaults.tear_down_on_setup) == 1:
            self.tearDown()  # Tear down in case previous run had unclean death
        master = self.input.servers[0]
        self.set_up_rest(master)

    def setUpBase1(self):
        if max(self.parami('num_buckets', 1), self.parami(
                'xdcr_num_buckets', 1)) > 1:
            bucket = 'bucket-0'
        else:
            bucket = self.param('bucket', 'default')
        vBuckets = self.rest.get_vbuckets(bucket)
        self.vbucket_count = len(vBuckets) if vBuckets else 0

    def setUp(self):
        self.setUpBase0()

        mc_threads = self.parami("mc_threads", PerfDefaults.mc_threads)
        if mc_threads != PerfDefaults.mc_threads:
            for node in self.input.servers:
                self.set_mc_threads(node, mc_threads)

        erlang_schedulers = self.param("erlang_schedulers",
                                       PerfDefaults.erlang_schedulers)
        if erlang_schedulers:
            ClusterOperationHelper.set_erlang_schedulers(
                self.input.servers, erlang_schedulers)
        master = self.input.servers[0]

        self.is_multi_node = False
        self.data_path = master.data_path

        # Number of items loaded by load() method.
        # Does not include or count any items that came from set_up_dgm().
        #
        self.num_items_loaded = 0

        if self.input.clusters:
            for cluster in self.input.clusters.values():
                master = cluster[0]
                self.set_up_rest(master)
                self.set_up_cluster(master)
        else:
            master = self.input.servers[0]
            self.set_up_cluster(master)

        # Rebalance
        if self.input.clusters:
            for cluster in self.input.clusters.values():
                num_nodes = self.parami("num_nodes_before", len(cluster))
                self.rebalance_nodes(num_nodes, cluster)
        else:
            num_nodes = self.parami("num_nodes", 10)
            self.rebalance_nodes(num_nodes)

        if self.input.clusters:
            for cluster in self.input.clusters.values():
                master = cluster[0]
                self.set_up_rest(master)
                self.set_up_buckets()
        else:
            self.set_up_buckets()

        self.set_up_proxy()

        if self.input.clusters:
            for cluster in self.input.clusters.values():
                master = cluster[0]
                self.set_up_rest(master)
                self.reconfigure()
        else:
            self.reconfigure()

        if self.parami("dgm", getattr(self, "dgm", 1)):
            self.set_up_dgm()

        time.sleep(10)
        self.setUpBase1()

        if self.input.clusters:
            for cluster in self.input.clusters.values():
                self.wait_until_warmed_up(cluster[0])
        else:
            self.wait_until_warmed_up()
        ClusterOperationHelper.flush_os_caches(self.input.servers)

    def set_up_rest(self, master):
        self.rest = RestConnection(master)
        self.rest_helper = RestHelper(self.rest)

    def set_up_cluster(self, master):
        """Initialize cluster"""
        self.log.info("setting up cluster")

        self.rest.init_cluster(master.rest_username, master.rest_password)

        memory_quota = self.parami('mem_quota', PerfDefaults.mem_quota)
        self.rest.init_cluster_memoryQuota(master.rest_username,
                                           master.rest_password,
                                           memoryQuota=memory_quota)

    def _get_bucket_names(self, num_buckets):
        """
        Get a list of bucket names
        """
        if num_buckets > 1:
            buckets = ['bucket-{0}'.format(i) for i in range(num_buckets)]
        else:
            buckets = [self.param('bucket', 'default')]

        return buckets

    def get_bucket_conf(self):
        """ retrieve bucket configurations"""

        num_buckets = max(self.parami('num_buckets', 1),
                          self.parami('xdcr_num_buckets', 1))
        self.buckets = self._get_bucket_names(num_buckets)

    def set_up_buckets(self):
        """Set up data bucket(s)"""

        self.log.info("setting up buckets")

        self.get_bucket_conf()

        for bucket in self.buckets:
            bucket_ram_quota = self.parami('mem_quota', PerfDefaults.mem_quota)
            bucket_threads_num = self.parami('threads_number',
                                             PerfDefaults.threads_number)
            bucket_ram_quota /= max(self.parami('num_buckets', 1),
                                    self.parami('xdcr_num_buckets', 1))
            replicas = self.parami('replicas', getattr(self, 'replicas', 1))
            index_replicas = self.parami('index_replicas', 0)

            self.rest.create_bucket(bucket=bucket,
                                    ramQuotaMB=bucket_ram_quota,
                                    replicaNumber=replicas,
                                    authType='sasl',
                                    threadsNumber=bucket_threads_num,
                                    replica_index=index_replicas)

            status = self.rest_helper.vbucket_map_ready(bucket, 60)
            self.assertTrue(status, msg='vbucket_map not ready .. timed out')
            status = self.rest_helper.bucket_exists(bucket)
            self.assertTrue(status,
                            msg='unable to create {0} bucket'.format(bucket))

    def reconfigure(self):
        """Customize basic Couchbase setup"""
        self.log.info("customizing setup")

        self.set_loglevel()
        self.customize_xdcr_settings()
        self.set_autocompaction()
        self.set_exp_pager_stime()
        self.set_rebalance_options()

    def set_rebalance_options(self):
        # rebalanceMovesBeforeCompaction
        rmbc = self.parami('rebalance_moves_before_compaction', 0)
        if rmbc:
            cmd = 'ns_config:set(rebalance_moves_before_compaction, {0}).'\
                .format(rmbc)
            self.rest.diag_eval(cmd)

    def set_exp_pager_stime(self):
        exp_pager_stime = self.param('exp_pager_stime',
                                     PerfDefaults.exp_pager_stime)
        if exp_pager_stime != PerfDefaults.exp_pager_stime:
            self.set_ep_param('flush_param', 'exp_pager_stime',
                              exp_pager_stime)

    def set_loglevel(self):
        """Set custom loglevel"""

        loglevel = self.param('loglevel', None)
        if loglevel:
            self.rest.set_global_loglevel(loglevel)

    def set_mc_threads(self, node, mc_threads):
        """Change number of memcached threads"""
        rest = RestConnection(node)
        rest.set_mc_threads(mc_threads)
        self.log.info("num of memcached threads = {0}".format(mc_threads))

    def customize_xdcr_settings(self):
        """Set custom XDCR environment variables"""
        max_concurrent_reps_per_doc = self.param('max_concurrent_reps_per_doc',
                                                 None)
        xdcr_doc_batch_size_kb = self.param('xdcr_doc_batch_size_kb', None)
        xdcr_checkpoint_interval = self.param('xdcr_checkpoint_interval', None)
        xdcr_latency_optimization = self.param('xdcr_latency_optimization',
                                               None)

        if max_concurrent_reps_per_doc:
            param = 'xdcrMaxConcurrentReps'
            value = max_concurrent_reps_per_doc
        elif xdcr_doc_batch_size_kb:
            param = 'xdcrDocBatchSizeKb'
            value = xdcr_doc_batch_size_kb
        elif xdcr_checkpoint_interval:
            param = 'xdcrCheckpointInterval'
            value = xdcr_checkpoint_interval
        else:
            return

        self.log.info("changing {0} to {1}".format(param, value))

        for servers in self.input.clusters.values():
            rest_conn = RestConnection(servers[0])
            replications = rest_conn.get_replications()
            for repl in replications:
                src_bucket = repl.get_src_bucket()
                dst_bucket = repl.get_dest_bucket()
                rest_conn.set_xdcr_param(src_bucket.name, dst_bucket.name,
                                         param, value)

    def set_ep_compaction(self, comp_ratio):
        """Set up ep_engine side compaction ratio"""
        for server in self.input.servers:
            shell = RemoteMachineShellConnection(server)
            cmd = "/opt/couchbase/bin/cbepctl localhost:11210 "\
                  "set flush_param db_frag_threshold {0}".format(comp_ratio)
            self._exec_and_log(shell, cmd)
            shell.disconnect()

    def set_autocompaction(self, disable_view_compaction=False):
        """Set custom auto-compaction settings"""

        try:
            # Parallel database and view compaction
            parallel_compaction = self.param("parallel_compaction",
                                             PerfDefaults.parallel_compaction)
            # Database fragmentation threshold
            db_compaction = self.parami("db_compaction",
                                        PerfDefaults.db_compaction)
            self.log.info("database compaction = {0}".format(db_compaction))

            # ep_engine fragementation threshold
            ep_compaction = self.parami("ep_compaction",
                                        PerfDefaults.ep_compaction)
            if ep_compaction != PerfDefaults.ep_compaction:
                self.set_ep_compaction(ep_compaction)
                self.log.info(
                    "ep_engine compaction = {0}".format(ep_compaction))

            # View fragmentation threshold
            if disable_view_compaction:
                view_compaction = 100
            else:
                view_compaction = self.parami("view_compaction",
                                              PerfDefaults.view_compaction)
            # Set custom auto-compaction settings
            self.rest.set_auto_compaction(
                parallelDBAndVC=parallel_compaction,
                dbFragmentThresholdPercentage=db_compaction,
                viewFragmntThresholdPercentage=view_compaction)
        except Exception as e:
            # It's very hard to determine what exception it can raise.
            # Therefore we have to use general handler.
            self.log.error(
                "Error while changing compaction settings: {0}".format(e))

    def set_ep_param(self, type, param, value):
        """
        Set ep-engine specific param, using cbepctl

        type: paramter type, e.g: flush_param, tap_param, etc
        """
        bucket = Bucket(name=self.buckets[0], authType="sasl", saslPassword="")
        for server in self.input.servers:
            shell = RemoteMachineShellConnection(server)
            shell.execute_cbepctl(bucket, "", "set %s" % type, param, value)
            shell.disconnect()

    def tearDown(self):
        if self.parami("tear_down", 0) == 1:
            self.log.info("routine skipped")
            return

        self.log.info("routine starts")

        if self.parami("tear_down_proxy", 1) == 1:
            self.tear_down_proxy()
        else:
            self.log.info("proxy tearDown skipped")

        if self.sc is not None:
            self.sc.stop()
            self.sc = None

        if self.parami("tear_down_bucket", 0) == 1:
            self.tear_down_buckets()
        else:
            self.log.info("bucket tearDown skipped")

        if self.parami("tear_down_cluster", 1) == 1:
            self.tear_down_cluster()
        else:
            self.log.info("cluster tearDown skipped")

        self.log.info("routine finished")

    def tear_down_buckets(self):
        self.log.info("tearing down bucket")
        BucketOperationHelper.delete_all_buckets_or_assert(
            self.input.servers, self)
        self.log.info("bucket teared down")

    def tear_down_cluster(self):
        self.log.info("tearing down cluster")
        ClusterOperationHelper.cleanup_cluster(self.input.servers)
        ClusterOperationHelper.wait_for_ns_servers_or_assert(
            self.input.servers, self)
        self.log.info("Cluster teared down")

    def set_up_proxy(self, bucket=None):
        """Set up and start Moxi"""

        if self.input.moxis:
            self.log.info("setting up proxy")

            bucket = bucket or self.param('bucket', 'default')

            shell = RemoteMachineShellConnection(self.input.moxis[0])
            shell.start_moxi(self.input.servers[0].ip, bucket,
                             self.input.moxis[0].port)
            shell.disconnect()

    def tear_down_proxy(self):
        if len(self.input.moxis) > 0:
            shell = RemoteMachineShellConnection(self.input.moxis[0])
            shell.stop_moxi()
            shell.disconnect()

    # Returns "host:port" of moxi to hit.
    def target_host_port(self, bucket='default', use_direct=False):
        rv = self.param('moxi', None)
        if use_direct:
            return "%s:%s" % (self.input.servers[0].ip, '11210')
        if rv:
            return rv
        if len(self.input.moxis) > 0:
            return "%s:%s" % (self.input.moxis[0].ip, self.input.moxis[0].port)
        return "%s:%s" % (self.input.servers[0].ip,
                          self.rest.get_bucket(bucket).nodes[0].moxi)

    def protocol_parse(self, protocol_in, use_direct=False):
        if protocol_in.find('://') >= 0:
            if protocol_in.find("couchbase:") >= 0:
                protocol = "couchbase"
            else:
                protocol = \
                    '-'.join(((["membase"] +
                    protocol_in.split("://"))[-2] + "-binary").split('-')[0:2])
            host_port = ('@' + protocol_in.split("://")[-1]).split('@')[-1]
            user, pswd = (('@' + protocol_in.split("://")[-1]).split('@')[-2] +
                          ":").split(':')[0:2]
        else:
            protocol = 'memcached-' + protocol_in
            host_port = self.target_host_port(use_direct=use_direct)
            user = self.param("rest_username", "Administrator")
            pswd = self.param("rest_password", "password")
        return protocol, host_port, user, pswd

    def mk_protocol(self, host, port='8091', prefix='membase-binary'):
        return self.param('protocol', prefix + '://' + host + ':' + port)

    def get_backups(self, protocol):
        """ Get backup server lists for memcached-binary """
        port = protocol.split(":")[-1]
        return map(lambda server: "%s:%s" % (server.ip, port),
                   self.input.servers[1:])

    def restartProxy(self, bucket=None):
        self.tear_down_proxy()
        self.set_up_proxy(bucket)

    def set_up_dgm(self):
        """Download fragmented, DGM dataset onto each cluster node, if not
        already locally available.

        The number of vbuckets and database schema must match the
        target cluster.

        Shutdown all cluster nodes.

        Do a cluster-restore.

        Restart all cluster nodes."""

        bucket = self.param("bucket", "default")
        ClusterOperationHelper.stop_cluster(self.input.servers)
        for server in self.input.servers:
            remote = RemoteMachineShellConnection(server)
            #TODO: Better way to pass num_nodes and db_size?
            self.get_data_files(remote, bucket, 1, 10)
            remote.disconnect()
        ClusterOperationHelper.start_cluster(self.input.servers)

    def get_data_files(self, remote, bucket, num_nodes, db_size):
        base = 'https://s3.amazonaws.com/database-analysis'
        dir = '/tmp/'
        if remote.is_couchbase_installed():
            dir = dir + '/couchbase/{0}-{1}-{2}/'.format(
                num_nodes, 256, db_size)
            output, error = remote.execute_command('mkdir -p {0}'.format(dir))
            remote.log_command_output(output, error)
            file = '{0}_cb.tar.gz'.format(bucket)
            base_url = base + '/couchbase/{0}-{1}-{2}/{3}'.format(
                num_nodes, 256, db_size, file)
        else:
            dir = dir + '/membase/{0}-{1}-{2}/'.format(num_nodes, 1024,
                                                       db_size)
            output, error = remote.execute_command('mkdir -p {0}'.format(dir))
            remote.log_command_output(output, error)
            file = '{0}_mb.tar.gz'.format(bucket)
            base_url = base + '/membase/{0}-{1}-{2}/{3}'.format(
                num_nodes, 1024, db_size, file)

        info = remote.extract_remote_info()
        wget_command = 'wget'
        if info.type.lower() == 'windows':
            wget_command = \
                "cd {0} ;cmd /c 'c:\\automation\\wget.exe --no-check-certificate"\
                .format(dir)

        # Check if the file exists on the remote server else download the gzipped version
        # Extract if necessary
        exist = remote.file_exists(dir, file)
        if not exist:
            additional_quote = ""
            if info.type.lower() == 'windows':
                additional_quote = "'"
            command = "{0} -v -O {1}{2} {3} {4} ".format(
                wget_command, dir, file, base_url, additional_quote)
            output, error = remote.execute_command(command)
            remote.log_command_output(output, error)

        if remote.is_couchbase_installed():
            if info.type.lower() == 'windows':
                destination_folder = testconstants.WIN_COUCHBASE_DATA_PATH
            else:
                destination_folder = testconstants.COUCHBASE_DATA_PATH
        else:
            if info.type.lower() == 'windows':
                destination_folder = testconstants.WIN_MEMBASE_DATA_PATH
            else:
                destination_folder = testconstants.MEMBASE_DATA_PATH
        if self.data_path:
            destination_folder = self.data_path
        untar_command = 'cd {1}; tar -xzf {0}'.format(dir + file,
                                                      destination_folder)
        output, error = remote.execute_command(untar_command)
        remote.log_command_output(output, error)

    def _exec_and_log(self, shell, cmd):
        """helper method to execute a command and log output"""
        if not cmd or not shell:
            return

        output, error = shell.execute_command(cmd)
        shell.log_command_output(output, error)

    def _build_tar_name(self,
                        bucket,
                        version="unknown_version",
                        file_base=None):
        """build tar file name.

        {file_base}-{version}-{bucket}.tar.gz
        """
        if not file_base:
            file_base = os.path.splitext(
                os.path.basename(
                    self.param("conf_file", PerfDefaults.conf_file)))[0]
        return "{0}-{1}-{2}.tar.gz".format(file_base, version, bucket)

    def _save_snapshot(self, server, bucket, file_base=None):
        """Save data files to a snapshot"""

        src_data_path = os.path.dirname(server.data_path
                                        or testconstants.COUCHBASE_DATA_PATH)
        dest_data_path = "{0}-snapshots".format(src_data_path)

        self.log.info(
            "server={0}, src_data_path={1}, dest_data_path={2}".format(
                server.ip, src_data_path, dest_data_path))

        shell = RemoteMachineShellConnection(server)

        build_name, short_version, full_version = \
            shell.find_build_version("/opt/couchbase/", "VERSION.txt", "cb")

        dest_file = self._build_tar_name(bucket, full_version, file_base)

        self._exec_and_log(shell, "mkdir -p {0}".format(dest_data_path))

        # save as gzip file, if file exsits, overwrite
        # TODO: multiple buckets
        zip_cmd = "cd {0}; tar -cvzf {1}/{2} {3} {3}-data _*"\
            .format(src_data_path, dest_data_path, dest_file, bucket)
        self._exec_and_log(shell, zip_cmd)

        shell.disconnect()
        return True

    def _load_snapshot(self, server, bucket, file_base=None, overwrite=True):
        """Load data files from a snapshot"""

        dest_data_path = os.path.dirname(server.data_path
                                         or testconstants.COUCHBASE_DATA_PATH)
        src_data_path = "{0}-snapshots".format(dest_data_path)

        self.log.info(
            "server={0}, src_data_path={1}, dest_data_path={2}".format(
                server.ip, src_data_path, dest_data_path))

        shell = RemoteMachineShellConnection(server)

        build_name, short_version, full_version = \
            shell.find_build_version("/opt/couchbase/", "VERSION.txt", "cb")

        src_file = self._build_tar_name(bucket, full_version, file_base)

        if not shell.file_exists(src_data_path, src_file):
            self.log.error("file '{0}/{1}' does not exist".format(
                src_data_path, src_file))
            shell.disconnect()
            return False

        if not overwrite:
            self._save_snapshot(server, bucket, "{0}.tar.gz".format(
                time.strftime(PerfDefaults.strftime)))  # TODO: filename

        rm_cmd = "rm -rf {0}/{1} {0}/{1}-data {0}/_*".format(
            dest_data_path, bucket)
        self._exec_and_log(shell, rm_cmd)

        unzip_cmd = "cd {0}; tar -xvzf {1}/{2}".format(dest_data_path,
                                                       src_data_path, src_file)
        self._exec_and_log(shell, unzip_cmd)

        shell.disconnect()
        return True

    def save_snapshots(self, file_base, bucket):
        """Save snapshots on all servers"""
        if not self.input.servers or not bucket:
            self.log.error("invalid server list or bucket name")
            return False

        ClusterOperationHelper.stop_cluster(self.input.servers)

        for server in self.input.servers:
            self._save_snapshot(server, bucket, file_base)

        ClusterOperationHelper.start_cluster(self.input.servers)

        return True

    def load_snapshots(self, file_base, bucket):
        """Load snapshots on all servers"""
        if not self.input.servers or not bucket:
            self.log.error("invalid server list or bucket name")
            return False

        ClusterOperationHelper.stop_cluster(self.input.servers)

        for server in self.input.servers:
            if not self._load_snapshot(server, bucket, file_base):
                ClusterOperationHelper.start_cluster(self.input.servers)
                return False

        ClusterOperationHelper.start_cluster(self.input.servers)

        return True

    def spec(self, reference):
        self.spec_reference = self.param("spec", reference)

    def mk_stats(self, verbosity):
        return StatsCollector(verbosity)

    def _get_src_version(self):
        """get testrunner version"""
        try:
            result = subprocess.Popen(['git', 'rev-parse', 'HEAD'],
                                      stdout=subprocess.PIPE).communicate()[0]
        except subprocess.CalledProcessError as e:
            self.log.error("unable to get src code version : {0}".format(e))
            return "unknown version"
        return result.rstrip()[:7]

    def start_stats(self,
                    stats_spec,
                    servers=None,
                    process_names=('memcached', 'beam.smp'),
                    test_params=None,
                    client_id='',
                    collect_server_stats=True,
                    ddoc=None):
        if self.parami('stats', 1) == 0:
            return None

        servers = servers or self.input.servers
        clusters = None
        if hasattr(self, "get_region"):
            if self.parami("access_phase", 0):
                clusters = self.input.clusters
                if self.get_region() == "west":
                    clusters[0], clusters[1] = clusters[1], clusters[0]
        sc = self.mk_stats(False)
        bucket = self.param("bucket", "default")
        sc.start(servers,
                 bucket,
                 process_names,
                 stats_spec,
                 client_id,
                 collect_server_stats=collect_server_stats,
                 ddoc=ddoc,
                 clusters=clusters)
        test_params['testrunner'] = self._get_src_version()
        self.test_params = test_params
        self.sc = sc
        return self.sc

    def end_stats(self, sc, total_stats=None, stats_spec=None):
        if sc is None:
            return
        if stats_spec is None:
            stats_spec = self.spec_reference
        if total_stats:
            sc.total_stats(total_stats)
        self.log.info("stopping stats collector")
        sc.stop()
        self.log.info("stats collector is stopped")
        sc.export(stats_spec, self.test_params)

    def load(self,
             num_items,
             min_value_size=None,
             kind='binary',
             protocol='binary',
             ratio_sets=1.0,
             ratio_hot_sets=0.0,
             ratio_hot_gets=0.0,
             ratio_expirations=0.0,
             expiration=None,
             prefix="",
             doc_cache=1,
             use_direct=True,
             report=0,
             start_at=-1,
             collect_server_stats=True,
             is_eperf=False,
             hot_shift=0):
        cfg = {
            'max-items':
            num_items,
            'max-creates':
            num_items,
            'max-ops-per-sec':
            self.parami("load_mcsoda_max_ops_sec",
                        PerfDefaults.mcsoda_max_ops_sec),
            'min-value-size':
            min_value_size or self.parami("min_value_size", 1024),
            'ratio-sets':
            self.paramf("load_ratio_sets", ratio_sets),
            'ratio-misses':
            self.paramf("load_ratio_misses", 0.0),
            'ratio-creates':
            self.paramf("load_ratio_creates", 1.0),
            'ratio-deletes':
            self.paramf("load_ratio_deletes", 0.0),
            'ratio-hot':
            0.0,
            'ratio-hot-sets':
            ratio_hot_sets,
            'ratio-hot-gets':
            ratio_hot_gets,
            'ratio-expirations':
            ratio_expirations,
            'expiration':
            expiration or 0,
            'exit-after-creates':
            1,
            'json':
            int(kind == 'json'),
            'batch':
            self.parami("batch", PerfDefaults.batch),
            'vbuckets':
            self.vbucket_count,
            'doc-cache':
            doc_cache,
            'prefix':
            prefix,
            'report':
            report,
            'hot-shift':
            hot_shift,
            'cluster_name':
            self.param("cluster_name", "")
        }
        cur = {}
        if start_at >= 0:
            cur['cur-items'] = start_at
            cur['cur-gets'] = start_at
            cur['cur-sets'] = start_at
            cur['cur-ops'] = cur['cur-gets'] + cur['cur-sets']
            cur['cur-creates'] = start_at
            cfg['max-creates'] = start_at + num_items
            cfg['max-items'] = cfg['max-creates']

        cfg_params = cfg.copy()
        cfg_params['test_time'] = time.time()
        cfg_params['test_name'] = self.id()

        # phase: 'load' or 'reload'
        phase = "load"
        if self.parami("hot_load_phase", 0) == 1:
            # all gets
            if self.parami("hot_load_get", PerfDefaults.hot_load_get) == 1:
                cfg['ratio-sets'] = 0
                cfg['exit-after-creates'] = 0
                cfg['exit-after-gets'] = 1
                cfg['max-gets'] = start_at + num_items
            phase = "reload"

        if is_eperf:
            collect_server_stats = self.parami("prefix", 0) == 0
            client_id = self.parami("prefix", 0)
            sc = self.start_stats(
                "{0}.{1}".format(self.spec_reference,
                                 phase),  # stats spec e.x: testname.load
                test_params=cfg_params,
                client_id=client_id,
                collect_server_stats=collect_server_stats)

        # For Black box, multi node tests
        # always use membase-binary
        if self.is_multi_node:
            protocol = self.mk_protocol(host=self.input.servers[0].ip,
                                        port=self.input.servers[0].port)

        protocol, host_port, user, pswd = \
            self.protocol_parse(protocol, use_direct=use_direct)

        if not user.strip():
            if "11211" in host_port:
                user = self.param("bucket", "default")
            else:
                user = self.input.servers[0].rest_username
        if not pswd.strip():
            if not "11211" in host_port:
                pswd = self.input.servers[0].rest_password

        self.log.info("mcsoda %s %s %s %s" % (protocol, host_port, user, pswd))
        self.log.info("mcsoda cfg:\n" + pprint.pformat(cfg))
        self.log.info("mcsoda cur:\n" + pprint.pformat(cfg))

        cur, start_time, end_time = \
            self.mcsoda_run(cfg, cur, protocol, host_port, user, pswd,
                            stats_collector=sc, heartbeat=self.parami("mcsoda_heartbeat", 0),
                            why="load", bucket=self.param("bucket", "default"))
        self.num_items_loaded = num_items
        ops = {
            'tot-sets': cur.get('cur-sets', 0),
            'tot-gets': cur.get('cur-gets', 0),
            'tot-items': cur.get('cur-items', 0),
            'tot-creates': cur.get('cur-creates', 0),
            'tot-misses': cur.get('cur-misses', 0),
            "start-time": start_time,
            "end-time": end_time
        }

        if is_eperf:
            if self.parami("load_wait_until_drained", 1) == 1:
                self.wait_until_drained()
            if self.parami("load_wait_until_repl",
                           PerfDefaults.load_wait_until_repl) == 1:
                self.wait_until_repl()
            self.end_stats(sc, ops, "{0}.{1}".format(self.spec_reference,
                                                     phase))

        return ops, start_time, end_time

    def mcsoda_run(self,
                   cfg,
                   cur,
                   protocol,
                   host_port,
                   user,
                   pswd,
                   stats_collector=None,
                   stores=None,
                   ctl=None,
                   heartbeat=0,
                   why="",
                   bucket="default",
                   backups=None):
        return mcsoda.run(cfg,
                          cur,
                          protocol,
                          host_port,
                          user,
                          pswd,
                          stats_collector=stats_collector,
                          stores=stores,
                          ctl=ctl,
                          heartbeat=heartbeat,
                          why=why,
                          bucket=bucket,
                          backups=backups)

    def rebalance_nodes(self, num_nodes, cluster=None):
        """Rebalance cluster(s) if more than 1 node provided"""
        if len(self.input.servers) == 1 or num_nodes == 1:
            self.log.warn("running on single node cluster")
            return
        else:
            self.log.info(
                "rebalancing nodes - num_nodes = {0}".format(num_nodes))

        if not cluster:
            cluster = self.input.servers
        status, _ = RebalanceHelper.rebalance_in(cluster,
                                                 num_nodes - 1,
                                                 do_shuffle=False)
        self.assertTrue(status)

    def delayed_rebalance_worker(self,
                                 servers,
                                 num_nodes,
                                 delay_seconds,
                                 sc,
                                 max_retries=PerfDefaults.reb_max_retries,
                                 reb_mode=PerfDefaults.REB_MODE.IN):
        time.sleep(delay_seconds)
        gmt_now = time.strftime(PerfDefaults.strftime, time.gmtime())
        self.log.info("rebalance started")

        if not sc:
            self.log.error("invalid stats collector")
            return
        status = False
        retries = 0
        while not status and retries <= max_retries:
            start_time = time.time()
            if reb_mode == PerfDefaults.REB_MODE.OUT:
                status, nodes = RebalanceHelper.rebalance_out(
                    servers, num_nodes)
            elif reb_mode == PerfDefaults.REB_MODE.SWAP:
                status, nodes = RebalanceHelper.rebalance_swap(
                    servers, num_nodes)
            else:
                status, nodes = RebalanceHelper.rebalance_in(
                    servers, num_nodes - 1, do_check=(not retries))
            end_time = time.time()
            self.log.info("status: {0}, nodes: {1}, retries: {2}".format(
                status, nodes, retries))
            if not status:
                retries += 1
                time.sleep(delay_seconds)
        sc.reb_stats(start_time, end_time - start_time)
        if self.parami("master_events", PerfDefaults.master_events):
            filename = "master_events.log"
            with open(filename, "w") as f:
                f.write(self.rest.diag_master_events()[1])

    def delayed_rebalance(self,
                          num_nodes,
                          delay_seconds=10,
                          max_retries=PerfDefaults.reb_max_retries,
                          reb_mode=0,
                          sync=False):
        self.log.info("delayed_rebalance")
        if sync:
            PerfBase.delayed_rebalance_worker(self, self.input.servers,
                                              num_nodes, delay_seconds,
                                              self.sc, max_retries, reb_mode)
        else:
            t = threading.Thread(target=PerfBase.delayed_rebalance_worker,
                                 args=(self, self.input.servers, num_nodes,
                                       delay_seconds, self.sc, max_retries,
                                       reb_mode))
            t.daemon = True
            t.start()

    @staticmethod
    def set_auto_compaction(server, parallel_compaction, percent_threshold):
        rest = RestConnection(server)
        rest.set_auto_compaction(
            parallel_compaction,
            dbFragmentThresholdPercentage=percent_threshold,
            viewFragmntThresholdPercentage=percent_threshold)

    @staticmethod
    def delayed_compaction_worker(servers, parallel_compaction,
                                  percent_threshold, delay_seconds):
        time.sleep(delay_seconds)
        PerfBase.set_auto_compaction(servers[0], parallel_compaction,
                                     percent_threshold)

    def delayed_compaction(self,
                           parallel_compaction="false",
                           percent_threshold=0.01,
                           delay_seconds=10):
        t = threading.Thread(target=PerfBase.delayed_compaction_worker,
                             args=(self.input.servers, parallel_compaction,
                                   percent_threshold, delay_seconds))
        t.daemon = True
        t.start()

    def loop(self,
             num_ops=None,
             num_items=None,
             max_items=None,
             max_creates=None,
             min_value_size=None,
             exit_after_creates=0,
             kind='binary',
             protocol='binary',
             clients=1,
             ratio_misses=0.0,
             ratio_sets=0.0,
             ratio_creates=0.0,
             ratio_deletes=0.0,
             ratio_hot=0.2,
             ratio_hot_sets=0.95,
             ratio_hot_gets=0.95,
             ratio_expirations=0.0,
             expiration=None,
             test_name=None,
             prefix="",
             doc_cache=1,
             use_direct=True,
             collect_server_stats=True,
             start_at=-1,
             report=0,
             ctl=None,
             hot_shift=0,
             is_eperf=False,
             ratio_queries=0,
             queries=0,
             ddoc=None):
        num_items = num_items or self.num_items_loaded

        hot_stack_size = \
            self.parami('hot_stack_size', PerfDefaults.hot_stack_size) or \
            (num_items * ratio_hot)

        cfg = {
            'max-items':
            max_items or num_items,
            'max-creates':
            max_creates or 0,
            'max-ops-per-sec':
            self.parami("mcsoda_max_ops_sec", PerfDefaults.mcsoda_max_ops_sec),
            'min-value-size':
            min_value_size or self.parami("min_value_size", 1024),
            'exit-after-creates':
            exit_after_creates,
            'ratio-sets':
            ratio_sets,
            'ratio-misses':
            ratio_misses,
            'ratio-creates':
            ratio_creates,
            'ratio-deletes':
            ratio_deletes,
            'ratio-hot':
            ratio_hot,
            'ratio-hot-sets':
            ratio_hot_sets,
            'ratio-hot-gets':
            ratio_hot_gets,
            'ratio-expirations':
            ratio_expirations,
            'ratio-queries':
            ratio_queries,
            'expiration':
            expiration or 0,
            'threads':
            clients,
            'json':
            int(kind == 'json'),
            'batch':
            self.parami("batch", PerfDefaults.batch),
            'vbuckets':
            self.vbucket_count,
            'doc-cache':
            doc_cache,
            'prefix':
            prefix,
            'queries':
            queries,
            'report':
            report,
            'hot-shift':
            hot_shift,
            'hot-stack':
            self.parami("hot_stack", PerfDefaults.hot_stack),
            'hot-stack-size':
            hot_stack_size,
            'hot-stack-rotate':
            self.parami("hot_stack_rotate", PerfDefaults.hot_stack_rotate),
            'cluster_name':
            self.param("cluster_name", ""),
            'observe':
            self.param("observe", PerfDefaults.observe),
            'obs-backoff':
            self.paramf('obs_backoff', PerfDefaults.obs_backoff),
            'obs-max-backoff':
            self.paramf('obs_max_backoff', PerfDefaults.obs_max_backoff),
            'obs-persist-count':
            self.parami('obs_persist_count', PerfDefaults.obs_persist_count),
            'obs-repl-count':
            self.parami('obs_repl_count', PerfDefaults.obs_repl_count),
            'woq-pattern':
            self.parami('woq_pattern', PerfDefaults.woq_pattern),
            'woq-verbose':
            self.parami('woq_verbose', PerfDefaults.woq_verbose),
            'cor-pattern':
            self.parami('cor_pattern', PerfDefaults.cor_pattern),
            'cor-persist':
            self.parami('cor_persist', PerfDefaults.cor_persist),
            'time':
            self.parami('time', 0),
            'cbm':
            self.parami('cbm', PerfDefaults.cbm),
            'cbm-host':
            self.param('cbm_host', PerfDefaults.cbm_host),
            'cbm-port':
            self.parami('cbm_port', PerfDefaults.cbm_port)
        }

        cfg_params = cfg.copy()
        cfg_params['test_time'] = time.time()
        cfg_params['test_name'] = test_name
        client_id = ''
        stores = None

        if is_eperf:
            client_id = self.parami("prefix", 0)
        sc = None
        if self.parami("collect_stats", 1):
            sc = self.start_stats(self.spec_reference + ".loop",
                                  test_params=cfg_params,
                                  client_id=client_id,
                                  collect_server_stats=collect_server_stats,
                                  ddoc=ddoc)

        self.cur = {'cur-items': num_items}
        if start_at >= 0:
            self.cur['cur-gets'] = start_at
        if num_ops is None:
            num_ops = num_items
        if isinstance(num_ops, int):
            cfg['max-ops'] = num_ops
        else:
            # Here, we num_ops looks like "time to run" tuple of...
            # ('seconds', integer_num_of_seconds_to_run)
            cfg['time'] = num_ops[1]

        # For Black box, multi node tests
        # always use membase-binary
        if self.is_multi_node:
            protocol = self.mk_protocol(host=self.input.servers[0].ip,
                                        port=self.input.servers[0].port)

        backups = self.get_backups(protocol)
        self.log.info("mcsoda protocol %s" % protocol)
        protocol, host_port, user, pswd = \
            self.protocol_parse(protocol, use_direct=use_direct)

        if not user.strip():
            if "11211" in host_port:
                user = self.param("bucket", "default")
            else:
                user = self.input.servers[0].rest_username
        if not pswd.strip():
            if not "11211" in host_port:
                pswd = self.input.servers[0].rest_password

        self.log.info("mcsoda %s %s %s %s" % (protocol, host_port, user, pswd))
        self.log.info("mcsoda cfg:\n" + pprint.pformat(cfg))
        self.log.info("mcsoda cur:\n" + pprint.pformat(cfg))
        self.log.info("mcsoda backups: %s" % backups)

        # For query tests always use StoreCouchbase
        if protocol == "couchbase":
            stores = [StoreCouchbase()]

        self.cur, start_time, end_time = \
            self.mcsoda_run(cfg, self.cur, protocol, host_port, user, pswd,
                            stats_collector=sc, ctl=ctl, stores=stores,
                            heartbeat=self.parami("mcsoda_heartbeat", 0),
                            why="loop", bucket=self.param("bucket", "default"),
                            backups=backups)

        ops = {
            'tot-sets': self.cur.get('cur-sets', 0),
            'tot-gets': self.cur.get('cur-gets', 0),
            'tot-items': self.cur.get('cur-items', 0),
            'tot-creates': self.cur.get('cur-creates', 0),
            'tot-misses': self.cur.get('cur-misses', 0),
            "start-time": start_time,
            "end-time": end_time
        }

        if self.parami("loop_wait_until_drained",
                       PerfDefaults.loop_wait_until_drained):
            self.wait_until_drained()

        if self.parami("loop_wait_until_repl",
                       PerfDefaults.loop_wait_until_repl):
            self.wait_until_repl()

        if self.parami("collect_stats", 1) and \
                not self.parami("reb_no_fg", PerfDefaults.reb_no_fg):
            self.end_stats(sc, ops, self.spec_reference + ".loop")

        why = self.params("why", "main")
        prefix = self.parami("prefix", 0)
        self.log.info("finished")

        return ops, start_time, end_time

    def wait_until_drained(self):
        self.log.info("draining disk write queue")

        master = self.input.servers[0]
        bucket = self.param("bucket", "default")
        ready = RebalanceHelper.wait_for_persistence(master, bucket)
        self.assertTrue(ready, "not all items persisted. see logs")

        self.log.info("disk write queue has been drained")
        return time.time()

    def wait_until_repl(self):
        self.log.info("waiting for replication")

        master = self.input.servers[0]
        bucket = self.param("bucket", "default")

        RebalanceHelper.wait_for_stats_on_all(
            master,
            bucket,
            'vb_replica_queue_size',
            0,
            fn=RebalanceHelper.wait_for_stats_no_timeout)

        RebalanceHelper.wait_for_stats_on_all(
            master,
            bucket,
            'ep_tap_replica_queue_itemondisk',
            0,
            fn=RebalanceHelper.wait_for_stats_no_timeout)

        RebalanceHelper.wait_for_stats_on_all(
            master,
            bucket,
            'ep_tap_rebalance_queue_backfillremaining',
            0,
            fn=RebalanceHelper.wait_for_stats_no_timeout)

        RebalanceHelper.wait_for_stats_on_all(
            master,
            bucket,
            'ep_tap_replica_qlen',
            0,
            fn=RebalanceHelper.wait_for_stats_no_timeout)

        self.log.info("replication is done")

    def warmup(self, collect_stats=True, flush_os_cache=False):
        """
        Restart cluster and wait for it to warm up.
        In current version, affect the master node only.
        """
        if not self.input.servers:
            self.log.error("empty server list")
            return

        if collect_stats:
            client_id = self.parami("prefix", 0)
            test_params = {
                'test_time': time.time(),
                'test_name': self.id(),
                'json': 0
            }
            sc = self.start_stats(self.spec_reference + ".warmup",
                                  test_params=test_params,
                                  client_id=client_id)

        self.log.info("preparing to warmup cluster ...")

        server = self.input.servers[0]
        shell = RemoteMachineShellConnection(server)

        start_time = time.time()

        self.log.info("stopping couchbase ... ({0})".format(server.ip))
        shell.stop_couchbase()
        self.log.info("couchbase stopped ({0})".format(server.ip))

        if flush_os_cache:
            self.log.info("flushing os cache ...")
            shell.flush_os_caches()

        shell.start_couchbase()
        self.log.info("couchbase restarted ({0})".format(server.ip))

        self.wait_until_warmed_up()
        self.log.info("warmup finished")

        end_time = time.time()
        ops = {
            'tot-sets': 0,
            'tot-gets': 0,
            'tot-items': 0,
            'tot-creates': 0,
            'tot-misses': 0,
            "start-time": start_time,
            "end-time": end_time
        }

        if collect_stats:
            self.end_stats(sc, ops, self.spec_reference + ".warmup")

    def wait_until_warmed_up(self, master=None):
        if not master:
            master = self.input.servers[0]

        bucket = self.param("bucket", "default")

        fn = RebalanceHelper.wait_for_mc_stats_no_timeout
        for bucket in self.buckets:
            RebalanceHelper.wait_for_stats_on_all(master,
                                                  bucket,
                                                  'ep_warmup_thread',
                                                  'complete',
                                                  fn=fn)

    def set_param(self, name, val):

        input = getattr(self, "input", TestInputSingleton.input)
        input.test_params[name] = str(val)

        return True

    def wait_for_task_completion(self, task='indexer'):
        """Wait for ns_server task to finish"""
        t0 = time.time()
        self.log.info("Waiting 30 seconds before {0} monitoring".format(task))
        time.sleep(30)

        while True:
            tasks = self.rest.ns_server_tasks()
            if tasks:
                try:
                    progress = [
                        t['progress'] for t in tasks if t['type'] == task
                    ]
                except TypeError:
                    self.log.error(tasks)
                else:
                    if progress:
                        self.log.info("{0} progress: {1}".format(
                            task, progress))
                        time.sleep(10)
                    else:
                        break

        t1 = time.time()
        self.log.info("Time taken to perform task: {0} sec".format(t1 - t0))

    def param(self, name, default_value):
        input = getattr(self, "input", TestInputSingleton.input)
        return input.test_params.get(name, default_value)

    def parami(self, name, default_int):
        return int(self.param(name, default_int))

    def paramf(self, name, default_float):
        return float(self.param(name, default_float))

    def params(self, name, default_str):
        return str(self.param(name, default_str))
Exemplo n.º 22
0
 def set_up_rest(self, master):
     self.rest = RestConnection(master)
     self.rest_helper = RestHelper(self.rest)
Exemplo n.º 23
0
class FTSServerGroups(FTSBaseTest, NewUpgradeBaseTest):
    def setUp(self):
        super(FTSServerGroups, self).setUp()
        self.rest = RestConnection(self._cb_cluster.get_master_node())
        self.helper = RestHelper(self.rest)
        self.default_group_name = "Group 1"
        self.fts_query = {"match": "emp", "field": "type"}

        self._cleanup_server_groups()

    def tearDown(self):
        super(FTSServerGroups, self).tearDown()

    def test_mixed_cluster(self):
        self.initial_version = self._input.param('kv_build', '6.6.3-9700')

        self.product = self._input.param('product', 'couchbase-server')
        self.initial_vbuckets = 1024

        self.debug_logs = False
        self.init_nodes = True
        self.initial_build_type = None
        self.use_hostnames = False

        kv_nodes = self._cb_cluster.get_kv_nodes()
        fts_nodes = self._cb_cluster.get_fts_nodes()
        self._install(servers=kv_nodes)

        self.initial_version = self._input.param('upgrade_version',
                                                 '7.1.0-1092')

        self._install(servers=fts_nodes)

        super(FTSServerGroups, self).setUp()
        self.test_nodes_ejection()

    def test_nodes_ejection(self):
        eject_nodes_structure = self._input.param("eject_nodes", None)
        eject_type = self._input.param("eject_type", None)
        initial_query_zones = self._input.param("query_zone_before_eject",
                                                None).split("|")
        post_eject_query_zones = self._input.param("query_zone_after_eject",
                                                   None).split("|")

        self.build_cluster()
        self.load_data()
        idx = self.build_index()

        fts_nodes = []
        for initial_query_zone in initial_query_zones:
            fts_nodes.extend(
                self.get_zone_healthy_fts_nodes(zone=initial_query_zone))

        initial_hits = self.query_node(index=idx, node=fts_nodes[0])

        for node in fts_nodes[1:]:
            hits = self.query_node(index=idx, node=node)
            self.assertEqual(
                initial_hits, hits,
                "Difference in search results before node eject detected.")

        ejected_nodes = self.eject_nodes(
            eject_nodes_structure=eject_nodes_structure, eject_type=eject_type)

        post_eject_query_nodes = []
        for post_eject_query_zone in post_eject_query_zones:
            fts_nodes = self.get_zone_healthy_fts_nodes(
                zone=post_eject_query_zone)
            post_eject_query_nodes.extend(fts_nodes)

        self._maybe_rebalance()

        try:
            for healthy_fts_node in post_eject_query_nodes:
                post_eject_hits, _, _, _ = idx.execute_query(
                    self.fts_query, node=healthy_fts_node)
                self.assertEqual(
                    initial_hits, post_eject_hits,
                    "Hits are different after server groups modification!")
        finally:
            if eject_type == "shutdown":
                for ejected_node in ejected_nodes:
                    remote = RemoteMachineShellConnection(ejected_node)
                    remote.start_couchbase()

    def test_index_modification(self):
        mod_type = self._input.param("mod_type", None)
        self.build_cluster()
        self.load_data()
        idx = self.build_index()

        self.update_index(index=idx, mod_type=mod_type)

        self._maybe_rebalance()

        fts_nodes = self._cb_cluster.get_fts_nodes()
        etalon_hits = self.query_node(index=idx, node=fts_nodes[0])
        for node in fts_nodes[1:]:
            hits = self.query_node(index=idx, node=node)
            self.assertEqual(
                etalon_hits, hits,
                "Found differences in fts request results between nodes after index modification"
            )

    def test_replicas_distribution(self):
        final_replicas = self._input.param("final_replicas", 0)
        self.build_cluster()
        self.load_data()
        idx = self.build_index()

        idx.update_num_replicas(final_replicas)

        self.wait_for_indexing_complete(item_count=1000)
        self._maybe_rebalance()

        index_replica = idx.get_num_replicas()
        zones_with_replica = self.calculate_zones_with_replica(index=idx)
        self.assertEqual(
            index_replica + 1, zones_with_replica,
            f"Found incorrect replicas distribution: index replicas: {index_replica}"
            f", zones with replica count: {zones_with_replica}")

    def test_partitions_distribution(self):
        index_partitions = int(self._input.param("partitions", 1))

        self.build_cluster()
        self.load_data()
        idx = self.build_index()
        self.wait_for_indexing_complete(item_count=1000)

        self._maybe_rebalance()

        for zone in self.rest.get_zone_names():
            zone_fts_nodes = self.get_zone_healthy_fts_nodes(zone=zone)
            if len(zone_fts_nodes) > 0:
                zone_partitions_count = 0
                for node in zone_fts_nodes:
                    rest_client = RestConnection(node)
                    _, num_pindexes = rest_client.get_fts_stats(
                        index_name=idx.name,
                        bucket_name=idx.source_bucket.name,
                        stat_name="num_pindexes_actual")
                    zone_partitions_count = zone_partitions_count + num_pindexes
                self.assertEqual(
                    zone_partitions_count, index_partitions,
                    "Actual initial partitions distribution differs from expected."
                )

    def test_server_groups_modification(self):
        index_partitions = int(self._input.param("partitions", 1))
        final_replicas = self._input.param("final_replicas", None)
        self.build_cluster()
        available_nodes = self.rebuild_cluster_to_initial_state()
        self.load_data()
        idx = self.build_index()
        self.wait_for_indexing_complete(item_count=1000)
        self.modify_server_groups(available_nodes=available_nodes)
        if final_replicas:
            idx.update_num_replicas(final_replicas)
        self.wait_for_indexing_complete(item_count=1000)

        self._maybe_rebalance()

        for zone in self.rest.get_zone_names():
            zone_fts_nodes = self.get_zone_healthy_fts_nodes(zone=zone)
            if len(zone_fts_nodes) > 0:
                zone_partitions_count = 0
                for node in zone_fts_nodes:
                    rest_client = RestConnection(node)
                    _, num_pindexes = rest_client.get_fts_stats(
                        index_name=idx.name,
                        bucket_name=idx.source_bucket.name,
                        stat_name="num_pindexes_actual")
                    zone_partitions_count = zone_partitions_count + num_pindexes
                self.assertEqual(
                    zone_partitions_count, index_partitions,
                    "Actual post server groups update partitions distribution differs from expected."
                )

        fts_nodes = self._cb_cluster.get_fts_nodes()

        self._maybe_rebalance()

        initial_hits = self.query_node(index=idx, node=fts_nodes[0])
        for node in fts_nodes[1:]:
            hits = self.query_node(index=idx, node=node)
            self.assertEqual(
                initial_hits, hits,
                "Difference in search results after server groups modification is detected."
            )

    def test_creation_order(self):
        index_partitions = int(self._input.param("partitions", 1))
        self.load_data()
        ordering = self._input.param("creation_order", None)

        if 'groups_first' == ordering:
            self.build_cluster()
            idx = self.build_index()
            self.wait_for_indexing_complete(item_count=1000)
        elif 'index_in_between' == ordering:
            self.build_cluster()
            available_nodes = self.rebuild_cluster_to_initial_state()
            idx = self.build_index()
            self.wait_for_indexing_complete(item_count=1000)
            self.modify_server_groups(available_nodes=available_nodes)
        else:
            idx = self.build_index()
            self.wait_for_indexing_complete(item_count=1000)
            self.build_cluster()
            self.wait_for_indexing_complete(item_count=1000)

        self._maybe_rebalance()

        for zone in self.rest.get_zone_names():
            zone_fts_nodes = self.get_zone_healthy_fts_nodes(zone=zone)
            if len(zone_fts_nodes) > 0:
                zone_partitions_count = 0
                for node in zone_fts_nodes:
                    rest_client = RestConnection(node)
                    _, num_pindexes = rest_client.get_fts_stats(
                        index_name=idx.name,
                        bucket_name=idx.source_bucket.name,
                        stat_name="num_pindexes_actual")
                    zone_partitions_count = zone_partitions_count + num_pindexes
                self.assertEqual(
                    zone_partitions_count, index_partitions,
                    "Actual initial partitions distribution differs from expected."
                )

        fts_nodes = self._cb_cluster.get_fts_nodes()
        initial_hits = self.query_node(index=idx, node=fts_nodes[0])
        for node in fts_nodes[1:]:
            hits = self.query_node(index=idx, node=node)
            self.assertEqual(
                initial_hits, hits,
                "Difference in search results after server groups modification is detected."
            )

    def test_best_effort_distribution(self):
        eject_nodes_structure = self._input.param("eject_nodes", None)
        eject_type = self._input.param("eject_type", None)

        self.build_cluster()
        self.load_data()
        idx = self.build_index()
        self.wait_for_indexing_complete(item_count=1000)

        fts_nodes = self._cb_cluster.get_fts_nodes()
        initial_hits = self.query_node(index=idx, node=fts_nodes[0])

        self.eject_nodes(eject_nodes_structure=eject_nodes_structure,
                         eject_type=eject_type)

        self.wait_for_indexing_complete(item_count=1000)

        self._maybe_rebalance()

        fts_nodes = self._cb_cluster.get_fts_nodes()
        for node in fts_nodes:
            hits = self.query_node(index=idx, node=node)
            self.assertEqual(
                initial_hits, hits,
                "Difference in search results after server group failover is detected."
            )

    def test_best_effort_distribution_max_group(self):
        eject_nodes_structure = self._input.param("eject_nodes", None)
        eject_type = self._input.param("eject_type", None)
        initial_server_groups = {}
        sg_structure = self._input.param("server_groups", None)
        server_groups = sg_structure.split("|")

        for server_group in server_groups:
            group_name = server_group.split("-")[0]
            group_structure = server_group.split("-")[1]
            initial_server_groups[group_name] = group_structure

        self.build_cluster()
        self.load_data()
        idx = self.build_index()
        self.wait_for_indexing_complete(item_count=1000)

        fts_nodes = self._cb_cluster.get_fts_nodes()
        initial_hits = self.query_node(index=idx, node=fts_nodes[0])

        self.eject_nodes(eject_nodes_structure=eject_nodes_structure,
                         eject_type=eject_type)

        self.wait_for_indexing_complete(item_count=1000)

        self._maybe_rebalance()

        # server group having maximum number of partitions
        max_server_group = self.find_max_server_group(idx=idx)

        # Let only maximal server group to stay alive
        for zone in initial_server_groups:
            if zone != max_server_group:
                self.eject_nodes(
                    eject_nodes_structure=
                    f"{zone}-{initial_server_groups[zone].replace('D:','')}",
                    eject_type="failover")

        max_group_fts_nodes = self.get_zone_healthy_fts_nodes(max_server_group)

        # find fts node in maximal server group holding min index partitions
        min_fts_node = None
        min_num_partitions = 1000
        for i in range(len(max_group_fts_nodes)):
            curr_partitions = self.get_num_partitions_distribution(
                index=idx, node=max_group_fts_nodes[i])
            if curr_partitions < min_num_partitions:
                min_fts_node = max_group_fts_nodes[i]
                min_num_partitions = curr_partitions

        # testing min fts node from maximal server group individually, failing over all the rest fts nodes from maximal server group
        for i in range(len(max_group_fts_nodes)):
            if max_group_fts_nodes[i].ip != min_fts_node.ip:
                self._cb_cluster.failover(graceful=False,
                                          node=max_group_fts_nodes[i])

        self._maybe_rebalance()

        min_fts_node_hints = self.query_node(index=idx, node=min_fts_node)
        self.assertEqual(initial_hits, min_fts_node_hints,
                         "Best effort distribution test is failed.")

    def test_best_effort_distribution_negative(self):
        eject_nodes_structure = self._input.param("eject_nodes", None)
        eject_type = self._input.param("eject_type", None)
        self.build_cluster()
        self.load_data()
        idx = self.build_index()
        self.wait_for_indexing_complete(item_count=1000)

        self.eject_nodes(eject_nodes_structure=eject_nodes_structure,
                         eject_type=eject_type)

        self.wait_for_indexing_complete(item_count=1000)

        self._maybe_rebalance()

        fts_nodes = self._cb_cluster.get_fts_nodes()
        for node in fts_nodes:
            hits = self.query_node(index=idx, node=node)
            self.assertGreater(
                hits, 0,
                "Partial search results were not returned by alive fts node.")

    def test_replicas_distribution_negative(self):
        self.build_cluster()
        self.load_data()
        self._maybe_rebalance()

        try:
            idx = self.build_index()
            self.wait_for_indexing_complete(item_count=1000)
            self.fail(
                "Was able to create index having 2 replicas for a cluster containing just 2 fts nodes but 3 server groups."
            )
        except Exception as e:
            self.assertTrue(
                "cluster needs 3 search nodes to support the requested replica count of 2"
                in str(e),
                "Unexpected error message while trying to create index with incorrect number of replicas."
            )

    def test_group_autofailover(self):
        eject_nodes_structure = self._input.param("eject_nodes", None)
        eject_type = self._input.param("eject_type", None)
        self.build_cluster()
        self.load_data()
        idx = self.build_index()
        self.wait_for_indexing_complete(item_count=1000)
        self.rest.update_autofailover_settings(True,
                                               60,
                                               enableServerGroup=True)

        ejected_nodes = self.eject_nodes(
            eject_nodes_structure=eject_nodes_structure, eject_type=eject_type)

        try:
            self.sleep(
                120, "Waiting for server group auto failover to be started.")
            initial_hits = self.query_node(
                index=idx, node=self._cb_cluster.get_fts_nodes()[0])

            for zone in self.rest.get_zone_names():
                fts_nodes = self.get_zone_healthy_fts_nodes(zone=zone)
                for node in fts_nodes:
                    hits = self.query_node(index=idx, node=node)
                    self.assertEqual(
                        initial_hits, hits,
                        "Difference in search results after server group auto-failover is detected."
                    )

        finally:
            for ejected_node in ejected_nodes:
                remote = RemoteMachineShellConnection(ejected_node)
                remote.start_couchbase()

    def find_max_server_group(self, idx=None):
        max_partitions_count = 0
        max_group = None
        for zone in self.rest.get_zone_names():
            zone_fts_nodes = self.get_zone_healthy_fts_nodes(zone=zone)
            if len(zone_fts_nodes) > 0:
                zone_partitions_count = 0
                for node in zone_fts_nodes:
                    rest_client = RestConnection(node)
                    _, num_pindexes = rest_client.get_fts_stats(
                        index_name=idx.name,
                        bucket_name=idx.source_bucket.name,
                        stat_name="num_pindexes_actual")
                    zone_partitions_count = zone_partitions_count + num_pindexes
                if zone_partitions_count > max_partitions_count:
                    max_partitions_count = zone_partitions_count
                    max_group = zone
        return max_group

    def modify_server_groups(self, available_nodes=None):
        eject_nodes_structure = self._input.param("eject_nodes", None)
        eject_type = self._input.param("eject_type", None)

        operation = self._input.param("operation", None)
        if 'add_group' == operation:
            add_groups = self._input.param("add_server_group", None).split("|")
            for add_group in add_groups:
                group_name = add_group.split("-")[0]
                group_nodes = add_group.split("-")[1].split(":")
                self.rest.add_zone(group_name)
                nodes_to_move = []
                for node in group_nodes:
                    node_to_shuffle = available_nodes.pop(0)
                    nodes_to_move.append(node_to_shuffle.ip)
                    if 'D' == node:
                        self._cb_cluster.rebalance_in_node(
                            nodes_in=[node_to_shuffle],
                            services=['kv'],
                            sleep_before_rebalance=0)
                    elif 'F' == node:
                        self._cb_cluster.rebalance_in_node(
                            nodes_in=[node_to_shuffle],
                            services=['fts'],
                            sleep_before_rebalance=0)
                    else:
                        self.fail(f"Unsupported node type found {node}!")
                self.rest.shuffle_nodes_in_zones(
                    moved_nodes=nodes_to_move,
                    source_zone=self.default_group_name,
                    target_zone=group_name)
        elif 'remove_group' == operation:
            self.eject_nodes(eject_nodes_structure=eject_nodes_structure,
                             eject_type=eject_type)
        elif 'add_nodes' == operation:
            extend_groups = self._input.param("groups_additions",
                                              None).split("|")
            for extended_group in extend_groups:
                group_name = extended_group.split("-")[0]
                nodes = extended_group.split("-")[1]
                nodes_to_move = []
                for node in nodes:
                    node_to_shuffle = available_nodes.pop(0)
                    if 'D' == node:
                        self._cb_cluster.rebalance_in_node(
                            nodes_in=[node_to_shuffle],
                            services=['kv'],
                            sleep_before_rebalance=0)
                    elif 'F' == node:
                        self._cb_cluster.rebalance_in_node(
                            nodes_in=[node_to_shuffle],
                            services=['fts'],
                            sleep_before_rebalance=0)
                    nodes_to_move.append(node_to_shuffle.ip)
                    self.rest.shuffle_nodes_in_zones(
                        moved_nodes=nodes_to_move,
                        source_zone=self.default_group_name,
                        target_zone=group_name)
        elif 'swap_nodes' == operation:
            server_group1_fts_node = None
            server_group2_fts_node = None
            server_group1_nodes = self.rest.get_nodes_in_zone('sg1')
            server_group2_nodes = self.rest.get_nodes_in_zone('sg2')
            for key in server_group1_nodes:
                for fts_node in self._cb_cluster.get_fts_nodes():
                    if fts_node.ip == key:
                        server_group1_fts_node = fts_node
                        break
            for key in server_group2_nodes:
                for fts_node in self._cb_cluster.get_fts_nodes():
                    if fts_node.ip == key:
                        server_group2_fts_node = fts_node
                        break
            self.rest.shuffle_nodes_in_zones(
                moved_nodes=[server_group1_fts_node.ip],
                source_zone='sg1',
                target_zone='sg2')
            self.rest.shuffle_nodes_in_zones(
                moved_nodes=[server_group2_fts_node.ip],
                source_zone='sg2',
                target_zone='sg1')
        elif 'rename' == operation:
            self.rest.rename_zone('sg1', 'sg1_1')
            self.rest.rename_zone('sg2', 'sg1_2')

    def get_num_partitions_distribution(self, index=None, node=None):
        rest_client = RestConnection(node)
        _, num_pindexes = rest_client.get_fts_stats(
            index_name=index.name,
            bucket_name=index.source_bucket.name,
            stat_name="num_pindexes_actual")
        return num_pindexes

    def calculate_zones_with_replica(self, index=None):
        zones_list = self.rest.get_all_zones_info()
        zones_with_replica = 0
        for zone in zones_list['groups']:
            replica_found = False
            nodes = zone['nodes']
            for node in nodes:
                if replica_found:
                    break
                if 'fts' in node['services']:
                    hostname = node['hostname'][0:node['hostname'].find(":")]
                    for fts_node in self._cb_cluster.get_fts_nodes():
                        if fts_node.ip == hostname:
                            rest_client = RestConnection(fts_node)
                            _, num_pindexes = rest_client.get_fts_stats(
                                index_name=index.name,
                                bucket_name=index.source_bucket.name,
                                stat_name="num_pindexes_actual")
                            if num_pindexes > 0:
                                replica_found = True
                                zones_with_replica += 1
                                break

        return zones_with_replica

    def update_index(self, index=None, mod_type=None):
        if mod_type == 'custom_mapping':
            index.index_definition['params']['doc_config'] = {}
            doc_config = {}
            doc_config['mode'] = 'type_field'
            doc_config['type_field'] = 'dept'
            index.index_definition['params']['doc_config'] = doc_config

            index.add_type_mapping_to_index_definition(type="filler",
                                                       analyzer="standard")
            index.index_definition['params']['mapping'] = {
                "default_analyzer": "standard",
                "default_datetime_parser": "dateTimeOptional",
                "default_field": "_all",
                "default_mapping": {
                    "dynamic": False,
                    "enabled": False
                },
                "default_type": "_default",
                "docvalues_dynamic": True,
                "index_dynamic": True,
                "store_dynamic": False,
                "type_field": "_type",
                "types": {
                    "Sales": {
                        "default_analyzer": "standard",
                        "dynamic": True,
                        "enabled": True,
                    }
                }
            }
            index.index_definition['uuid'] = index.get_uuid()
            index.update()
        elif "delete" == mod_type:
            self._cb_cluster.delete_fts_index(index.name)
        self.wait_for_indexing_complete(item_count=1000)

    def rebuild_cluster_to_initial_state(self):
        cleanup_nodes = self.rest.get_nodes_in_zone(self.default_group_name)
        nodes_to_remove = []
        for key in cleanup_nodes.keys():
            node = self._cb_cluster.get_node(key, str(8091))
            nodes_to_remove.append(node)
            self._cb_cluster.rebalance_out_node(node=node,
                                                sleep_before_rebalance=0)
        return nodes_to_remove

    def build_cluster(self):
        sg_structure = self._input.param("server_groups", None)
        server_groups = sg_structure.split("|")
        available_kv_nodes = self._cb_cluster.get_kv_nodes()
        available_fts_nodes = self._cb_cluster.get_fts_nodes()

        for server_group in server_groups:
            group_name = server_group.split("-")[0]
            group_nodes = server_group.split("-")[1].split(":")
            self.rest.add_zone(group_name)
            self.rest.get_all_zones_info()
            nodes_to_move = []

            for node in group_nodes:
                if 'D' == node:
                    if len(available_kv_nodes) == 0:
                        self.fail("Cannot find any available kv node!")
                    nodes_to_move.append(available_kv_nodes.pop(0).ip)

                elif 'F' == node:
                    if len(available_fts_nodes) == 0:
                        self.fail("Cannot find any available fts node!")
                    nodes_to_move.append(available_fts_nodes.pop(0).ip)
                else:
                    self.fail(f"Unsupported node type found {node}!")
            self.rest.shuffle_nodes_in_zones(
                moved_nodes=nodes_to_move,
                source_zone=self.default_group_name,
                target_zone=group_name)

    def build_index(self):
        replicas = self._input.param("replicas", 0)
        partitions = self._input.param("partitions", 1)

        collection_index, _type, index_scope, index_collections = self.define_index_parameters_collection_related(
        )
        idx = self.create_index(
            bucket=self._cb_cluster.get_bucket_by_name('default'),
            index_name="fts_idx",
            collection_index=collection_index,
            _type=_type,
            scope=index_scope,
            collections=index_collections)
        idx.update_index_partitions(partitions)
        idx.update_num_replicas(replicas)
        self.wait_for_indexing_complete(item_count=1000)
        return idx

    def get_zone_healthy_fts_nodes(self, zone=None):
        zone_nodes = self.rest.get_nodes_in_zone(zone)
        healthy_fts_nodes = []
        for key in zone_nodes.keys():
            node = zone_nodes[key]
            if node["status"] == 'healthy' and 'fts' in node['services']:
                for fts_node in self._cb_cluster.get_fts_nodes():
                    if key == fts_node.ip:
                        healthy_fts_nodes.append(fts_node)
        return healthy_fts_nodes

    def query_node(self, index=None, node=None):
        hits, _, _, _ = index.execute_query(self.fts_query, node=node)
        return hits

    def eject_nodes(self, eject_nodes_structure=None, eject_type=None):
        eject_server_groups = eject_nodes_structure.split("|")
        eject_nodes = []

        for eject_server_group in eject_server_groups:
            group_name = eject_server_group.split("-")[0]
            node_types = eject_server_group.split("-")[1]
            target_zone_nodes = self.rest.get_nodes_in_zone(group_name)
            node_type_arr = node_types.split(":")
            for node_type in node_type_arr:
                if 'D' == node_type:
                    for kv_node in self._cb_cluster.get_kv_nodes():
                        if kv_node.ip in target_zone_nodes.keys():
                            if kv_node not in eject_nodes:
                                eject_nodes.append(kv_node)
                                break
                elif 'F' == node_type:
                    for fts_node in self._cb_cluster.get_fts_nodes():
                        if fts_node.ip in target_zone_nodes.keys():
                            if fts_node not in eject_nodes:
                                eject_nodes.append(fts_node)
                                break
                else:
                    self.fail("Unsupported node type found in nodes to eject.")

        for node in eject_nodes:
            if "remove" == eject_type:
                self._cb_cluster.rebalance_out_node(node=node)
            elif "failover" == eject_type:
                self._cb_cluster.failover(graceful=False, node=node)
                #self._cb_cluster.rebalance_failover_nodes()
            elif "shutdown" == eject_type:
                remote = RemoteMachineShellConnection(node)
                remote.stop_couchbase()
                self._cb_cluster.failover(graceful=False, node=node)
                self._cb_cluster.rebalance_failover_nodes()
            elif "shutdown_no_rebalance" == eject_type:
                remote = RemoteMachineShellConnection(node)
                remote.stop_couchbase()

        return eject_nodes

    def create_server_group(self, group_name=None):
        self.rest.add_zone(group_name)

    def _cleanup_server_groups(self):
        curr_server_groups = self.rest.get_zone_names()
        for g in curr_server_groups.keys():
            if g != self.default_group_name:
                nodes = self.rest.get_nodes_in_zone(g)
                if nodes:
                    nodes_to_move = []
                    for key in nodes.keys():
                        nodes_to_move.append(key)
                    self.rest.shuffle_nodes_in_zones(
                        moved_nodes=nodes_to_move,
                        source_zone=g,
                        target_zone=self.default_group_name)
                self.rest.delete_zone(g)

    def _maybe_rebalance(self):
        if not self.helper.is_cluster_rebalanced():
            self._cb_cluster.rebalance_failover_nodes()
Exemplo n.º 24
0
 def set_up_rest(self, master):
     self.rest = RestConnection(master)
     self.rest_helper = RestHelper(self.rest)
Exemplo n.º 25
0
class PerfBase(unittest.TestCase):

    """
    specURL = http://hub.internal.couchbase.org/confluence/display/cbit/Black+Box+Performance+Test+Matrix

    """

    # The setUpBaseX() methods allow subclasses to resequence the setUp() and
    # skip cluster configuration.
    def setUpBase0(self):
        self.log = logger.Logger.get_logger()
        self.input = TestInputSingleton.input
        self.vbucket_count = PerfDefaults.vbuckets
        self.sc = None
        if self.parami("tear_down_on_setup",
                       PerfDefaults.tear_down_on_setup) == 1:
            self.tearDown()  # Tear down in case previous run had unclean death
        master = self.input.servers[0]
        self.set_up_rest(master)

    def setUpBase1(self):
        if max(self.parami('num_buckets', 1),
               self.parami('xdcr_num_buckets', 1)) > 1:
            bucket = 'bucket-0'
        else:
            bucket = self.param('bucket', 'default')
        vBuckets = self.rest.get_vbuckets(bucket)
        self.vbucket_count = len(vBuckets) if vBuckets else 0

    def setUp(self):
        self.setUpBase0()

        mc_threads = self.parami("mc_threads", PerfDefaults.mc_threads)
        if mc_threads != PerfDefaults.mc_threads:
            for node in self.input.servers:
                self.set_mc_threads(node, mc_threads)

        erlang_schedulers = self.param("erlang_schedulers",
                                       PerfDefaults.erlang_schedulers)
        if erlang_schedulers:
            ClusterOperationHelper.set_erlang_schedulers(self.input.servers,
                                                         erlang_schedulers)
        master = self.input.servers[0]

        self.is_multi_node = False
        self.data_path = master.data_path

        # Number of items loaded by load() method.
        # Does not include or count any items that came from set_up_dgm().
        #
        self.num_items_loaded = 0

        if self.input.clusters:
            for cluster in self.input.clusters.values():
                master = cluster[0]
                self.set_up_rest(master)
                self.set_up_cluster(master)
        else:
            master = self.input.servers[0]
            self.set_up_cluster(master)

        # Rebalance
        num_nodes = self.parami("num_nodes", 10)
        self.rebalance_nodes(num_nodes)

        if self.input.clusters:
            for cluster in self.input.clusters.values():
                master = cluster[0]
                self.set_up_rest(master)
                self.set_up_buckets()
        else:
            self.set_up_buckets()

        self.set_up_proxy()

        if self.input.clusters:
            for cluster in self.input.clusters.values():
                master = cluster[0]
                self.set_up_rest(master)
                self.reconfigure()
        else:
            self.reconfigure()

        if self.parami("dgm", getattr(self, "dgm", 1)):
            self.set_up_dgm()

        time.sleep(10)
        self.setUpBase1()

        if self.input.clusters:
            for cluster in self.input.clusters.values():
                self.wait_until_warmed_up(cluster[0])
        else:
            self.wait_until_warmed_up()
        ClusterOperationHelper.flush_os_caches(self.input.servers)

    def set_up_rest(self, master):
        self.rest = RestConnection(master)
        self.rest_helper = RestHelper(self.rest)

    def set_up_cluster(self, master):
        """Initialize cluster"""
        self.log.info("setting up cluster")

        self.rest.init_cluster(master.rest_username, master.rest_password)

        memory_quota = self.parami('mem_quota', PerfDefaults.mem_quota)
        self.rest.init_cluster_memoryQuota(master.rest_username,
                                           master.rest_password,
                                           memoryQuota=memory_quota)

    def _get_bucket_names(self, num_buckets):
        """
        Get a list of bucket names
        """
        if num_buckets > 1:
            buckets = ['bucket-{0}'.format(i) for i in range(num_buckets)]
        else:
            buckets = [self.param('bucket', 'default')]

        return buckets

    def get_bucket_conf(self):
        """ retrieve bucket configurations"""

        num_buckets = max(self.parami('num_buckets', 1),
                          self.parami('xdcr_num_buckets', 1))
        self.buckets = self._get_bucket_names(num_buckets)

    def set_up_buckets(self):
        """Set up data bucket(s)"""

        self.log.info("setting up buckets")

        self.get_bucket_conf()

        for bucket in self.buckets:
            bucket_ram_quota = self.parami('mem_quota', PerfDefaults.mem_quota)
            bucket_ram_quota /= max(self.parami('num_buckets', 1),
                                    self.parami('xdcr_num_buckets', 1))
            replicas = self.parami('replicas', getattr(self, 'replicas', 1))
            index_replicas = self.parami('index_replicas', 1)

            self.rest.create_bucket(bucket=bucket, ramQuotaMB=bucket_ram_quota,
                                    replicaNumber=replicas, authType='sasl',
                                    replica_index=index_replicas)

            status = self.rest_helper.vbucket_map_ready(bucket, 60)
            self.assertTrue(status, msg='vbucket_map not ready .. timed out')
            status = self.rest_helper.bucket_exists(bucket)
            self.assertTrue(status,
                            msg='unable to create {0} bucket'.format(bucket))

    def reconfigure(self):
        """Customize basic Couchbase setup"""
        self.log.info("customizing setup")

        self.set_loglevel()
        self.customize_xdcr_settings()
        self.set_autocompaction()
        self.set_exp_pager_stime()
        self.set_rebalance_options()

    def set_rebalance_options(self):
        # rebalanceMovesBeforeCompaction
        rmbc = self.parami('rebalance_moves_before_compaction', 0)
        if rmbc:
            cmd = 'ns_config:set(rebalance_moves_before_compaction, {0}).'\
                .format(rmbc)
            self.rest.diag_eval(cmd)

    def set_exp_pager_stime(self):
        exp_pager_stime = self.param('exp_pager_stime',
                                     PerfDefaults.exp_pager_stime)
        if exp_pager_stime != PerfDefaults.exp_pager_stime:
            self.set_ep_param('flush_param', 'exp_pager_stime', exp_pager_stime)

    def set_loglevel(self):
        """Set custom loglevel"""

        loglevel = self.param('loglevel', None)
        if loglevel:
            self.rest.set_global_loglevel(loglevel)

    def set_mc_threads(self, node, mc_threads):
        """Change number of memcached threads"""
        rest = RestConnection(node)
        rest.set_mc_threads(mc_threads)
        self.log.info("num of memcached threads = {0}".format(mc_threads))

    def customize_xdcr_settings(self):
        """Set custom XDCR environment variables"""
        max_concurrent_reps_per_doc = self.param('max_concurrent_reps_per_doc',
                                                 None)
        xdcr_doc_batch_size_kb = self.param('xdcr_doc_batch_size_kb', None)
        xdcr_checkpoint_interval = self.param('xdcr_checkpoint_interval', None)

        if max_concurrent_reps_per_doc:
            env_var = 'MAX_CONCURRENT_REPS_PER_DOC'
            value = max_concurrent_reps_per_doc
        elif xdcr_doc_batch_size_kb:
            env_var = 'XDCR_DOC_BATCH_SIZE_KB'
            value = xdcr_doc_batch_size_kb
        elif xdcr_checkpoint_interval:
            env_var = 'XDCR_CHECKPOINT_INTERVAL'
            value = xdcr_checkpoint_interval
        else:
            return

        self.log.info("changing {0} to {1}".format(env_var, value))

        for server in self.input.servers:
            rc = RemoteMachineShellConnection(server)
            rc.set_environment_variable(env_var, value)

    def set_ep_compaction(self, comp_ratio):
        """Set up ep_engine side compaction ratio"""
        for server in self.input.servers:
            shell = RemoteMachineShellConnection(server)
            cmd = "/opt/couchbase/bin/cbepctl localhost:11210 "\
                  "set flush_param db_frag_threshold {0}".format(comp_ratio)
            self._exec_and_log(shell, cmd)
            shell.disconnect()

    def set_autocompaction(self, disable_view_compaction=False):
        """Set custom auto-compaction settings"""

        try:
            # Parallel database and view compaction
            parallel_compaction = self.param("parallel_compaction",
                                             PerfDefaults.parallel_compaction)
            # Database fragmentation threshold
            db_compaction = self.parami("db_compaction",
                                        PerfDefaults.db_compaction)
            self.log.info("database compaction = {0}".format(db_compaction))

            # ep_engine fragementation threshold
            ep_compaction = self.parami("ep_compaction",
                                        PerfDefaults.ep_compaction)
            if ep_compaction != PerfDefaults.ep_compaction:
                self.set_ep_compaction(ep_compaction)
                self.log.info("ep_engine compaction = {0}".format(ep_compaction))

            # View fragmentation threshold
            if disable_view_compaction:
                view_compaction = 100
            else:
                view_compaction = self.parami("view_compaction",
                                              PerfDefaults.view_compaction)
            # Set custom auto-compaction settings
            self.rest.set_auto_compaction(parallelDBAndVC=parallel_compaction,
                                          dbFragmentThresholdPercentage=db_compaction,
                                          viewFragmntThresholdPercentage=view_compaction)
        except Exception as e:
            # It's very hard to determine what exception it can raise.
            # Therefore we have to use general handler.
            self.log.error("Error while changing compaction settings: {0}"
                           .format(e))

    def set_ep_param(self, type, param, value):
        """
        Set ep-engine specific param, using cbepctl

        type: paramter type, e.g: flush_param, tap_param, etc
        """
        bucket = Bucket(name=self.buckets[0], authType="sasl", saslPassword="")
        for server in self.input.servers:
            shell = RemoteMachineShellConnection(server)
            shell.execute_cbepctl(bucket,
                                  "", "set %s" % type, param, value)
            shell.disconnect()

    def tearDown(self):
        if self.parami("tear_down", 0) == 1:
            self.log.info("routine skipped")
            return

        self.log.info("routine starts")

        if self.parami("tear_down_proxy", 1) == 1:
            self.tear_down_proxy()
        else:
            self.log.info("proxy tearDown skipped")

        if self.sc is not None:
            self.sc.stop()
            self.sc = None

        if self.parami("tear_down_bucket", 0) == 1:
            self.tear_down_buckets()
        else:
            self.log.info("bucket tearDown skipped")

        if self.parami("tear_down_cluster", 1) == 1:
            self.tear_down_cluster()
        else:
            self.log.info("cluster tearDown skipped")

        self.log.info("routine finished")

    def tear_down_buckets(self):
        self.log.info("tearing down bucket")
        BucketOperationHelper.delete_all_buckets_or_assert(self.input.servers,
                                                           self)
        self.log.info("bucket teared down")

    def tear_down_cluster(self):
        self.log.info("tearing down cluster")
        ClusterOperationHelper.cleanup_cluster(self.input.servers)
        ClusterOperationHelper.wait_for_ns_servers_or_assert(self.input.servers,
                                                             self)
        self.log.info("Cluster teared down")

    def set_up_proxy(self, bucket=None):
        """Set up and start Moxi"""

        if self.input.moxis:
            self.log.info("setting up proxy")

            bucket = bucket or self.param('bucket', 'default')

            shell = RemoteMachineShellConnection(self.input.moxis[0])
            shell.start_moxi(self.input.servers[0].ip, bucket,
                             self.input.moxis[0].port)
            shell.disconnect()

    def tear_down_proxy(self):
        if len(self.input.moxis) > 0:
            shell = RemoteMachineShellConnection(self.input.moxis[0])
            shell.stop_moxi()
            shell.disconnect()

    # Returns "host:port" of moxi to hit.
    def target_host_port(self, bucket='default', use_direct=False):
        rv = self.param('moxi', None)
        if use_direct:
            return "%s:%s" % (self.input.servers[0].ip,
                              '11210')
        if rv:
            return rv
        if len(self.input.moxis) > 0:
            return "%s:%s" % (self.input.moxis[0].ip,
                              self.input.moxis[0].port)
        return "%s:%s" % (self.input.servers[0].ip,
                          self.rest.get_bucket(bucket).nodes[0].moxi)

    def protocol_parse(self, protocol_in, use_direct=False):
        if protocol_in.find('://') >= 0:
            if protocol_in.find("couchbase:") >= 0:
                protocol = "couchbase"
            else:
                protocol = \
                    '-'.join(((["membase"] +
                    protocol_in.split("://"))[-2] + "-binary").split('-')[0:2])
            host_port = ('@' + protocol_in.split("://")[-1]).split('@')[-1]
            user, pswd = (('@' +
                           protocol_in.split("://")[-1]).split('@')[-2] +
                           ":").split(':')[0:2]
        else:
            protocol = 'memcached-' + protocol_in
            host_port = self.target_host_port(use_direct=use_direct)
            user = self.param("rest_username", "Administrator")
            pswd = self.param("rest_password", "password")
        return protocol, host_port, user, pswd

    def mk_protocol(self, host, port='8091', prefix='membase-binary'):
        return self.param('protocol',
                          prefix + '://' + host + ':' + port)

    def get_backups(self, protocol):
        """ Get backup server lists for memcached-binary """
        port = protocol.split(":")[-1]
        return map(lambda server: "%s:%s" % (server.ip, port),
                   self.input.servers[1:])

    def restartProxy(self, bucket=None):
        self.tear_down_proxy()
        self.set_up_proxy(bucket)

    def set_up_dgm(self):
        """Download fragmented, DGM dataset onto each cluster node, if not
        already locally available.

        The number of vbuckets and database schema must match the
        target cluster.

        Shutdown all cluster nodes.

        Do a cluster-restore.

        Restart all cluster nodes."""

        bucket = self.param("bucket", "default")
        ClusterOperationHelper.stop_cluster(self.input.servers)
        for server in self.input.servers:
            remote = RemoteMachineShellConnection(server)
            #TODO: Better way to pass num_nodes and db_size?
            self.get_data_files(remote, bucket, 1, 10)
            remote.disconnect()
        ClusterOperationHelper.start_cluster(self.input.servers)

    def get_data_files(self, remote, bucket, num_nodes, db_size):
        base = 'https://s3.amazonaws.com/database-analysis'
        dir = '/tmp/'
        if remote.is_couchbase_installed():
            dir = dir + '/couchbase/{0}-{1}-{2}/'.format(num_nodes, 256,
                                                         db_size)
            output, error = remote.execute_command('mkdir -p {0}'.format(dir))
            remote.log_command_output(output, error)
            file = '{0}_cb.tar.gz'.format(bucket)
            base_url = base + '/couchbase/{0}-{1}-{2}/{3}'.format(num_nodes,
                                                                  256, db_size,
                                                                  file)
        else:
            dir = dir + '/membase/{0}-{1}-{2}/'.format(num_nodes, 1024,
                                                       db_size)
            output, error = remote.execute_command('mkdir -p {0}'.format(dir))
            remote.log_command_output(output, error)
            file = '{0}_mb.tar.gz'.format(bucket)
            base_url = base + '/membase/{0}-{1}-{2}/{3}'.format(num_nodes,
                                                                1024, db_size,
                                                                file)


        info = remote.extract_remote_info()
        wget_command = 'wget'
        if info.type.lower() == 'windows':
            wget_command = \
                "cd {0} ;cmd /c 'c:\\automation\\wget.exe --no-check-certificate"\
                .format(dir)

        # Check if the file exists on the remote server else download the gzipped version
        # Extract if necessary
        exist = remote.file_exists(dir, file)
        if not exist:
            additional_quote = ""
            if info.type.lower() == 'windows':
                additional_quote = "'"
            command = "{0} -v -O {1}{2} {3} {4} ".format(wget_command, dir,
                                                         file, base_url,
                                                         additional_quote)
            output, error = remote.execute_command(command)
            remote.log_command_output(output, error)

        if remote.is_couchbase_installed():
            if info.type.lower() == 'windows':
                destination_folder = testconstants.WIN_COUCHBASE_DATA_PATH
            else:
                destination_folder = testconstants.COUCHBASE_DATA_PATH
        else:
            if info.type.lower() == 'windows':
                destination_folder = testconstants.WIN_MEMBASE_DATA_PATH
            else:
                destination_folder = testconstants.MEMBASE_DATA_PATH
        if self.data_path:
            destination_folder = self.data_path
        untar_command = 'cd {1}; tar -xzf {0}'.format(dir + file,
                                                      destination_folder)
        output, error = remote.execute_command(untar_command)
        remote.log_command_output(output, error)

    def _exec_and_log(self, shell, cmd):
        """helper method to execute a command and log output"""
        if not cmd or not shell:
            return

        output, error = shell.execute_command(cmd)
        shell.log_command_output(output, error)

    def _build_tar_name(self, bucket, version="unknown_version",
                        file_base=None):
        """build tar file name.

        {file_base}-{version}-{bucket}.tar.gz
        """
        if not file_base:
            file_base = os.path.splitext(
                os.path.basename(self.param("conf_file",
                                 PerfDefaults.conf_file)))[0]
        return "{0}-{1}-{2}.tar.gz".format(file_base, version, bucket)

    def _save_snapshot(self, server, bucket, file_base=None):
        """Save data files to a snapshot"""

        src_data_path = os.path.dirname(server.data_path or
                                        testconstants.COUCHBASE_DATA_PATH)
        dest_data_path = "{0}-snapshots".format(src_data_path)

        self.log.info("server={0}, src_data_path={1}, dest_data_path={2}"
                      .format(server.ip, src_data_path, dest_data_path))

        shell = RemoteMachineShellConnection(server)

        build_name, short_version, full_version = \
            shell.find_build_version("/opt/couchbase/", "VERSION.txt", "cb")

        dest_file = self._build_tar_name(bucket, full_version, file_base)

        self._exec_and_log(shell, "mkdir -p {0}".format(dest_data_path))

        # save as gzip file, if file exsits, overwrite
        # TODO: multiple buckets
        zip_cmd = "cd {0}; tar -cvzf {1}/{2} {3} {3}-data _*"\
            .format(src_data_path, dest_data_path, dest_file, bucket)
        self._exec_and_log(shell, zip_cmd)

        shell.disconnect()
        return True

    def _load_snapshot(self, server, bucket, file_base=None, overwrite=True):
        """Load data files from a snapshot"""

        dest_data_path = os.path.dirname(server.data_path or
                                         testconstants.COUCHBASE_DATA_PATH)
        src_data_path = "{0}-snapshots".format(dest_data_path)

        self.log.info("server={0}, src_data_path={1}, dest_data_path={2}"
                      .format(server.ip, src_data_path, dest_data_path))

        shell = RemoteMachineShellConnection(server)

        build_name, short_version, full_version = \
            shell.find_build_version("/opt/couchbase/", "VERSION.txt", "cb")

        src_file = self._build_tar_name(bucket, full_version, file_base)

        if not shell.file_exists(src_data_path, src_file):
            self.log.error("file '{0}/{1}' does not exist"
                           .format(src_data_path, src_file))
            shell.disconnect()
            return False

        if not overwrite:
            self._save_snapshot(server, bucket,
                                "{0}.tar.gz".format(
                                    time.strftime(PerfDefaults.strftime)))  # TODO: filename

        rm_cmd = "rm -rf {0}/{1} {0}/{1}-data {0}/_*".format(dest_data_path,
                                                             bucket)
        self._exec_and_log(shell, rm_cmd)

        unzip_cmd = "cd {0}; tar -xvzf {1}/{2}".format(dest_data_path,
                                                       src_data_path, src_file)
        self._exec_and_log(shell, unzip_cmd)

        shell.disconnect()
        return True

    def save_snapshots(self, file_base, bucket):
        """Save snapshots on all servers"""
        if not self.input.servers or not bucket:
            self.log.error("invalid server list or bucket name")
            return False

        ClusterOperationHelper.stop_cluster(self.input.servers)

        for server in self.input.servers:
            self._save_snapshot(server, bucket, file_base)

        ClusterOperationHelper.start_cluster(self.input.servers)

        return True

    def load_snapshots(self, file_base, bucket):
        """Load snapshots on all servers"""
        if not self.input.servers or not bucket:
            self.log.error("invalid server list or bucket name")
            return False

        ClusterOperationHelper.stop_cluster(self.input.servers)

        for server in self.input.servers:
            if not self._load_snapshot(server, bucket, file_base):
                ClusterOperationHelper.start_cluster(self.input.servers)
                return False

        ClusterOperationHelper.start_cluster(self.input.servers)

        return True

    def spec(self, reference):
        self.spec_reference = self.param("spec", reference)

    def mk_stats(self, verbosity):
        return StatsCollector(verbosity)

    def _get_src_version(self):
        """get testrunner version"""
        try:
            result = subprocess.Popen(['git', 'rev-parse', 'HEAD'],
                                      stdout=subprocess.PIPE).communicate()[0]
        except subprocess.CalledProcessError as e:
            self.log.error("unable to get src code version : {0}".format(e))
            return "unknown version"
        return result.rstrip()[:7]

    def start_stats(self, stats_spec, servers=None,
                    process_names=('memcached', 'beam.smp'), test_params=None,
                    client_id='', collect_server_stats=True, ddoc=None):
        if self.parami('stats', 1) == 0:
            return None

        servers = servers or self.input.servers
        clusters = None
        if hasattr(self, "get_region"):
            if self.parami("access_phase", 0):
                clusters = self.input.clusters
                if self.get_region() == "west":
                    clusters[0], clusters[1] = clusters[1], clusters[0]
        sc = self.mk_stats(False)
        bucket = self.param("bucket", "default")
        sc.start(servers, bucket, process_names, stats_spec, client_id,
                 collect_server_stats=collect_server_stats, ddoc=ddoc,
                 clusters=clusters)
        test_params['testrunner'] = self._get_src_version()
        self.test_params = test_params
        self.sc = sc
        return self.sc

    def end_stats(self, sc, total_stats=None, stats_spec=None):
        if sc is None:
            return
        if stats_spec is None:
            stats_spec = self.spec_reference
        if total_stats:
            sc.total_stats(total_stats)
        self.log.info("stopping stats collector")
        sc.stop()
        self.log.info("stats collector is stopped")
        sc.export(stats_spec, self.test_params)

    def load(self, num_items, min_value_size=None,
             kind='binary',
             protocol='binary',
             ratio_sets=1.0,
             ratio_hot_sets=0.0,
             ratio_hot_gets=0.0,
             ratio_expirations=0.0,
             expiration=None,
             prefix="",
             doc_cache=1,
             use_direct=True,
             report=0,
             start_at= -1,
             collect_server_stats=True,
             is_eperf=False,
             hot_shift=0):
        cfg = {'max-items': num_items,
               'max-creates': num_items,
               'max-ops-per-sec': self.parami("load_mcsoda_max_ops_sec",
                                              PerfDefaults.mcsoda_max_ops_sec),
               'min-value-size': min_value_size or self.parami("min_value_size",
                                                               1024),
               'ratio-sets': self.paramf("load_ratio_sets", ratio_sets),
               'ratio-misses': self.paramf("load_ratio_misses", 0.0),
               'ratio-creates': self.paramf("load_ratio_creates", 1.0),
               'ratio-deletes': self.paramf("load_ratio_deletes", 0.0),
               'ratio-hot': 0.0,
               'ratio-hot-sets': ratio_hot_sets,
               'ratio-hot-gets': ratio_hot_gets,
               'ratio-expirations': ratio_expirations,
               'expiration': expiration or 0,
               'exit-after-creates': 1,
               'json': int(kind == 'json'),
               'batch': self.parami("batch", PerfDefaults.batch),
               'vbuckets': self.vbucket_count,
               'doc-cache': doc_cache,
               'prefix': prefix,
               'report': report,
               'hot-shift': hot_shift,
               'cluster_name': self.param("cluster_name", "")}
        cur = {}
        if start_at >= 0:
            cur['cur-items'] = start_at
            cur['cur-gets'] = start_at
            cur['cur-sets'] = start_at
            cur['cur-ops'] = cur['cur-gets'] + cur['cur-sets']
            cur['cur-creates'] = start_at
            cfg['max-creates'] = start_at + num_items
            cfg['max-items'] = cfg['max-creates']

        cfg_params = cfg.copy()
        cfg_params['test_time'] = time.time()
        cfg_params['test_name'] = self.id()

        # phase: 'load' or 'reload'
        phase = "load"
        if self.parami("hot_load_phase", 0) == 1:
            # all gets
            if self.parami("hot_load_get", PerfDefaults.hot_load_get) == 1:
                cfg['ratio-sets'] = 0
                cfg['exit-after-creates'] = 0
                cfg['exit-after-gets'] = 1
                cfg['max-gets'] = start_at + num_items
            phase = "reload"

        if is_eperf:
            collect_server_stats = self.parami("prefix", 0) == 0
            client_id = self.parami("prefix", 0)
            sc = self.start_stats("{0}.{1}".format(self.spec_reference, phase), # stats spec e.x: testname.load
                                  test_params=cfg_params, client_id=client_id,
                                  collect_server_stats=collect_server_stats)

        # For Black box, multi node tests
        # always use membase-binary
        if self.is_multi_node:
            protocol = self.mk_protocol(host=self.input.servers[0].ip,
                                        port=self.input.servers[0].port)

        protocol, host_port, user, pswd = \
            self.protocol_parse(protocol, use_direct=use_direct)

        if not user.strip():
            if "11211" in host_port:
                user = self.param("bucket", "default")
            else:
                user = self.input.servers[0].rest_username
        if not pswd.strip():
            if not "11211" in host_port:
                pswd = self.input.servers[0].rest_password

        self.log.info("mcsoda %s %s %s %s" %
                      (protocol, host_port, user, pswd))
        self.log.info("mcsoda cfg:\n" + pprint.pformat(cfg))
        self.log.info("mcsoda cur:\n" + pprint.pformat(cfg))

        cur, start_time, end_time = \
            self.mcsoda_run(cfg, cur, protocol, host_port, user, pswd,
                            heartbeat=self.parami("mcsoda_heartbeat", 0),
                            why="load", bucket=self.param("bucket", "default"))
        self.num_items_loaded = num_items
        ops = {'tot-sets': cur.get('cur-sets', 0),
               'tot-gets': cur.get('cur-gets', 0),
               'tot-items': cur.get('cur-items', 0),
               'tot-creates': cur.get('cur-creates', 0),
               'tot-misses': cur.get('cur-misses', 0),
               "start-time": start_time,
               "end-time": end_time}

        if is_eperf:
            if self.parami("load_wait_until_drained", 1) == 1:
                self.wait_until_drained()
            if self.parami("load_wait_until_repl",
                PerfDefaults.load_wait_until_repl) == 1:
                self.wait_until_repl()
            self.end_stats(sc, ops, "{0}.{1}".format(self.spec_reference,
                                                     phase))

        return ops, start_time, end_time

    def mcsoda_run(self, cfg, cur, protocol, host_port, user, pswd,
                   stats_collector=None, stores=None, ctl=None,
                   heartbeat=0, why="", bucket="default", backups=None):
        return mcsoda.run(cfg, cur, protocol, host_port, user, pswd,
                          stats_collector=stats_collector,
                          stores=stores,
                          ctl=ctl,
                          heartbeat=heartbeat,
                          why=why,
                          bucket=bucket,
                          backups=backups)

    def rebalance_nodes(self, num_nodes):
        """Rebalance cluster(s) if more than 1 node provided"""

        if len(self.input.servers) == 1 or num_nodes == 1:
            self.log.warn("running on single node cluster")
            return
        else:
            self.log.info("rebalancing nodes - num_nodes = {0}"
                          .format(num_nodes))

        if self.input.clusters:
            for cluster in self.input.clusters.values():
                status, _ = RebalanceHelper.rebalance_in(cluster,
                                                         num_nodes - 1,
                                                         do_shuffle=False)
                self.assertTrue(status)
        else:
            status, _ = RebalanceHelper.rebalance_in(self.input.servers,
                                                     num_nodes - 1,
                                                     do_shuffle=False)
            self.assertTrue(status)

    def delayed_rebalance_worker(self, servers, num_nodes, delay_seconds, sc,
                                 max_retries=PerfDefaults.reb_max_retries,
                                 reb_mode=PerfDefaults.REB_MODE.IN):
        time.sleep(delay_seconds)
        gmt_now = time.strftime(PerfDefaults.strftime, time.gmtime())
        self.log.info("rebalance started")

        if not sc:
            self.log.error("invalid stats collector")
            return
        status = False
        retries = 0
        while not status and retries <= max_retries:
            start_time = time.time()
            if reb_mode == PerfDefaults.REB_MODE.OUT:
                status, nodes = RebalanceHelper.rebalance_out(servers, num_nodes)
            elif reb_mode == PerfDefaults.REB_MODE.SWAP:
                status, nodes = RebalanceHelper.rebalance_swap(servers, num_nodes)
            else:
                status, nodes = RebalanceHelper.rebalance_in(servers,
                                        num_nodes - 1, do_check=(not retries))
            end_time = time.time()
            self.log.info("status: {0}, nodes: {1}, retries: {2}"
                          .format(status, nodes, retries))
            if not status:
                retries += 1
                time.sleep(delay_seconds)
        sc.reb_stats(start_time, end_time - start_time)
        if self.parami("master_events", PerfDefaults.master_events):
            filename = "master_events.log"
            with open(filename, "w") as f:
                f.write(self.rest.diag_master_events()[1])

    def delayed_rebalance(self, num_nodes, delay_seconds=10,
                          max_retries=PerfDefaults.reb_max_retries,
                          reb_mode=0, sync=False):
        self.log.info("delayed_rebalance")
        if sync:
            PerfBase.delayed_rebalance_worker(self, self.input.servers,
                num_nodes, delay_seconds, self.sc, max_retries, reb_mode)
        else:
            t = threading.Thread(target=PerfBase.delayed_rebalance_worker,
                                 args=(self, self.input.servers, num_nodes,
                                 delay_seconds, self.sc, max_retries, reb_mode))
            t.daemon = True
            t.start()

    @staticmethod
    def set_auto_compaction(server, parallel_compaction, percent_threshold):
        rest = RestConnection(server)
        rest.set_auto_compaction(parallel_compaction,
                                 dbFragmentThresholdPercentage=percent_threshold,
                                 viewFragmntThresholdPercentage=percent_threshold)

    @staticmethod
    def delayed_compaction_worker(servers, parallel_compaction,
                                  percent_threshold, delay_seconds):
        time.sleep(delay_seconds)
        PerfBase.set_auto_compaction(servers[0], parallel_compaction,
                                     percent_threshold)

    def delayed_compaction(self, parallel_compaction="false",
                           percent_threshold=0.01,
                           delay_seconds=10):
        t = threading.Thread(target=PerfBase.delayed_compaction_worker,
                             args=(self.input.servers,
                                   parallel_compaction,
                                   percent_threshold,
                                   delay_seconds))
        t.daemon = True
        t.start()

    def loop(self, num_ops=None,
             num_items=None,
             max_items=None,
             max_creates=None,
             min_value_size=None,
             exit_after_creates=0,
             kind='binary',
             protocol='binary',
             clients=1,
             ratio_misses=0.0,
             ratio_sets=0.0, ratio_creates=0.0, ratio_deletes=0.0,
             ratio_hot=0.2, ratio_hot_sets=0.95, ratio_hot_gets=0.95,
             ratio_expirations=0.0,
             expiration=None,
             test_name=None,
             prefix="",
             doc_cache=1,
             use_direct=True,
             collect_server_stats=True,
             start_at= -1,
             report=0,
             ctl=None,
             hot_shift=0,
             is_eperf=False,
             ratio_queries=0,
             queries=0,
             ddoc=None):
        num_items = num_items or self.num_items_loaded

        hot_stack_size = \
            self.parami('hot_stack_size', PerfDefaults.hot_stack_size) or \
            (num_items * ratio_hot)

        cfg = {'max-items': max_items or num_items,
               'max-creates': max_creates or 0,
               'max-ops-per-sec': self.parami("mcsoda_max_ops_sec",
                                              PerfDefaults.mcsoda_max_ops_sec),
               'min-value-size': min_value_size or self.parami("min_value_size",
                                                               1024),
               'exit-after-creates': exit_after_creates,
               'ratio-sets': ratio_sets,
               'ratio-misses': ratio_misses,
               'ratio-creates': ratio_creates,
               'ratio-deletes': ratio_deletes,
               'ratio-hot': ratio_hot,
               'ratio-hot-sets': ratio_hot_sets,
               'ratio-hot-gets': ratio_hot_gets,
               'ratio-expirations': ratio_expirations,
               'ratio-queries': ratio_queries,
               'expiration': expiration or 0,
               'threads': clients,
               'json': int(kind == 'json'),
               'batch': self.parami("batch", PerfDefaults.batch),
               'vbuckets': self.vbucket_count,
               'doc-cache': doc_cache,
               'prefix': prefix,
               'queries': queries,
               'report': report,
               'hot-shift': hot_shift,
               'hot-stack': self.parami("hot_stack", PerfDefaults.hot_stack),
               'hot-stack-size': hot_stack_size,
               'hot-stack-rotate': self.parami("hot_stack_rotate",
                                               PerfDefaults.hot_stack_rotate),
               'cluster_name': self.param("cluster_name", ""),
               'observe': self.param("observe", PerfDefaults.observe),
               'obs-backoff': self.paramf('obs_backoff',
                                          PerfDefaults.obs_backoff),
               'obs-max-backoff': self.paramf('obs_max_backoff',
                                              PerfDefaults.obs_max_backoff),
               'obs-persist-count': self.parami('obs_persist_count',
                                                PerfDefaults.obs_persist_count),
               'obs-repl-count': self.parami('obs_repl_count',
                                             PerfDefaults.obs_repl_count),
               'woq-pattern': self.parami('woq_pattern',
                                         PerfDefaults.woq_pattern),
               'woq-verbose': self.parami('woq_verbose',
                                         PerfDefaults.woq_verbose),
               'cor-pattern': self.parami('cor_pattern',
                                         PerfDefaults.cor_pattern),
               'cor-persist': self.parami('cor_persist',
                                         PerfDefaults.cor_persist),
               'time': self.parami('time', 0),
               'cbm': self.parami('cbm', PerfDefaults.cbm),
               'cbm-host': self.param('cbm_host', PerfDefaults.cbm_host),
               'cbm-port': self.parami('cbm_port', PerfDefaults.cbm_port)}

        cfg_params = cfg.copy()
        cfg_params['test_time'] = time.time()
        cfg_params['test_name'] = test_name
        client_id = ''
        stores = None

        if is_eperf:
            client_id = self.parami("prefix", 0)
        sc = None
        if self.parami("collect_stats", 1):
            sc = self.start_stats(self.spec_reference + ".loop",
                                  test_params=cfg_params, client_id=client_id,
                                  collect_server_stats=collect_server_stats,
                                  ddoc=ddoc)

        self.cur = {'cur-items': num_items}
        if start_at >= 0:
            self.cur['cur-gets'] = start_at
        if num_ops is None:
            num_ops = num_items
        if isinstance(num_ops, int):
            cfg['max-ops'] = num_ops
        else:
            # Here, we num_ops looks like "time to run" tuple of...
            # ('seconds', integer_num_of_seconds_to_run)
            cfg['time'] = num_ops[1]

        # For Black box, multi node tests
        # always use membase-binary
        if self.is_multi_node:
            protocol = self.mk_protocol(host=self.input.servers[0].ip,
                                        port=self.input.servers[0].port)

        backups = self.get_backups(protocol)
        self.log.info("mcsoda protocol %s" % protocol)
        protocol, host_port, user, pswd = \
            self.protocol_parse(protocol, use_direct=use_direct)

        if not user.strip():
            if "11211" in host_port:
                user = self.param("bucket", "default")
            else:
                user = self.input.servers[0].rest_username
        if not pswd.strip():
            if not "11211" in host_port:
                pswd = self.input.servers[0].rest_password

        self.log.info("mcsoda %s %s %s %s" %
                      (protocol, host_port, user, pswd))
        self.log.info("mcsoda cfg:\n" + pprint.pformat(cfg))
        self.log.info("mcsoda cur:\n" + pprint.pformat(cfg))
        self.log.info("mcsoda backups: %s" % backups)

        # For query tests always use StoreCouchbase
        if protocol == "couchbase":
            stores = [StoreCouchbase()]

        self.cur, start_time, end_time = \
            self.mcsoda_run(cfg, self.cur, protocol, host_port, user, pswd,
                            stats_collector=sc, ctl=ctl, stores=stores,
                            heartbeat=self.parami("mcsoda_heartbeat", 0),
                            why="loop", bucket=self.param("bucket", "default"),
                            backups=backups)

        ops = {'tot-sets': self.cur.get('cur-sets', 0),
               'tot-gets': self.cur.get('cur-gets', 0),
               'tot-items': self.cur.get('cur-items', 0),
               'tot-creates': self.cur.get('cur-creates', 0),
               'tot-misses': self.cur.get('cur-misses', 0),
               "start-time": start_time,
               "end-time": end_time}

        if self.parami("loop_wait_until_drained",
                       PerfDefaults.loop_wait_until_drained):
            self.wait_until_drained()

        if self.parami("loop_wait_until_repl",
                       PerfDefaults.loop_wait_until_repl):
            self.wait_until_repl()

        if self.parami("collect_stats", 1) and \
                not self.parami("reb_no_fg", PerfDefaults.reb_no_fg):
            self.end_stats(sc, ops, self.spec_reference + ".loop")

        why = self.params("why", "main")
        prefix = self.parami("prefix", 0)
        self.log.info("finished")

        return ops, start_time, end_time

    def wait_until_drained(self):
        self.log.info("draining disk write queue")

        master = self.input.servers[0]
        bucket = self.param("bucket", "default")

        RebalanceHelper.wait_for_stats_on_all(master, bucket,
                                              'ep_queue_size', 0,
                                              fn=RebalanceHelper.wait_for_stats_no_timeout)
        RebalanceHelper.wait_for_stats_on_all(master, bucket,
                                              'ep_flusher_todo', 0,
                                              fn=RebalanceHelper.wait_for_stats_no_timeout)

        self.log.info("disk write queue has been drained")

        return time.time()

    def wait_until_repl(self):
        self.log.info("waiting for replication")

        master = self.input.servers[0]
        bucket = self.param("bucket", "default")

        RebalanceHelper.wait_for_stats_on_all(master, bucket,
            'vb_replica_queue_size', 0,
            fn=RebalanceHelper.wait_for_stats_no_timeout)

        RebalanceHelper.wait_for_stats_on_all(master, bucket,
            'ep_tap_replica_queue_itemondisk', 0,
            fn=RebalanceHelper.wait_for_stats_no_timeout)

        RebalanceHelper.wait_for_stats_on_all(master, bucket,
            'ep_tap_rebalance_queue_backfillremaining', 0,
            fn=RebalanceHelper.wait_for_stats_no_timeout)

        RebalanceHelper.wait_for_stats_on_all(master, bucket,
            'ep_tap_replica_qlen', 0,
            fn=RebalanceHelper.wait_for_stats_no_timeout)

        self.log.info("replication is done")

    def warmup(self, collect_stats=True, flush_os_cache=False):
        """
        Restart cluster and wait for it to warm up.
        In current version, affect the master node only.
        """
        if not self.input.servers:
            self.log.error("empty server list")
            return

        if collect_stats:
            client_id = self.parami("prefix", 0)
            test_params = {'test_time': time.time(),
                           'test_name': self.id(),
                           'json': 0}
            sc = self.start_stats(self.spec_reference + ".warmup",
                                  test_params=test_params,
                                  client_id=client_id)

        self.log.info("preparing to warmup cluster ...")

        server = self.input.servers[0]
        shell = RemoteMachineShellConnection(server)

        start_time = time.time()

        self.log.info("stopping couchbase ... ({0})".format(server.ip))
        shell.stop_couchbase()
        self.log.info("couchbase stopped ({0})".format(server.ip))

        if flush_os_cache:
            self.log.info("flushing os cache ...")
            shell.flush_os_caches()

        shell.start_couchbase()
        self.log.info("couchbase restarted ({0})".format(server.ip))

        self.wait_until_warmed_up()
        self.log.info("warmup finished")

        end_time = time.time()
        ops = {'tot-sets': 0,
               'tot-gets': 0,
               'tot-items': 0,
               'tot-creates': 0,
               'tot-misses': 0,
               "start-time": start_time,
               "end-time": end_time}

        if collect_stats:
            self.end_stats(sc, ops, self.spec_reference + ".warmup")

    def wait_until_warmed_up(self, master=None):
        if not master:
            master = self.input.servers[0]

        bucket = self.param("bucket", "default")

        fn = RebalanceHelper.wait_for_mc_stats_no_timeout
        for bucket in self.buckets:
            RebalanceHelper.wait_for_stats_on_all(master, bucket,
                                                  'ep_warmup_thread',
                                                  'complete', fn=fn)
    def set_param(self, name, val):

        input = getattr(self, "input", TestInputSingleton.input)
        input.test_params[name] = str(val)

        return True

    def wait_for_task_completion(self, task='indexer'):
        """Wait for ns_server task to finish"""
        t0 = time.time()
        self.log.info("Waiting 30 seconds before {0} monitoring".format(task))
        time.sleep(30)

        while True:
            tasks = self.rest.ns_server_tasks()
            if tasks:
                try:
                    progress = [t['progress'] for t in tasks if t['type'] == task]
                except TypeError:
                    self.log.error(tasks)
                else:
                    if progress:
                        self.log.info("{0} progress: {1}".format(task, progress))
                        time.sleep(10)
                    else:
                        break

        t1 = time.time()
        self.log.info("Time taken to perform task: {0} sec".format(t1 - t0))

    def param(self, name, default_value):
        input = getattr(self, "input", TestInputSingleton.input)
        return input.test_params.get(name, default_value)

    def parami(self, name, default_int):
        return int(self.param(name, default_int))

    def paramf(self, name, default_float):
        return float(self.param(name, default_float))

    def params(self, name, default_str):
        return str(self.param(name, default_str))
Exemplo n.º 26
0
    def test_analytics_volume(self):
        queries = ['SELECT VALUE u FROM `GleambookUsers_ds` u WHERE u.user_since >= "2010-09-13T16-48-15" AND u.user_since < "2010-10-13T16-48-15" AND (SOME e IN u.employment SATISFIES e.end_date IS UNKNOWN) LIMIT 100;',
           'SELECT VALUE u FROM `GleambookUsers_ds` u WHERE u.user_since >= "2010-11-13T16-48-15" AND u.user_since < "2010-12-13T16-48-15" limit 1;',
           ]
        nodes_in_cluster= [self.servers[0],self.cbas_node]
        print "Start Time: %s"%str(time.strftime("%H:%M:%S", time.gmtime(time.time())))
        
        ########################################################################################################################
        self.log.info("Step 1: Start the test with 2 KV and 2 CBAS nodes")

        self.log.info("Add a N1QL/Index nodes")
        self.query_node = self.servers[1]
        rest = RestConnection(self.query_node)
        rest.set_data_path(data_path=self.query_node.data_path,index_path=self.query_node.index_path,cbas_path=self.query_node.cbas_path)
        result = self.add_node(self.query_node, rebalance=False)
        self.assertTrue(result, msg="Failed to add N1QL/Index node.")
        self.log.info("Add a KV nodes")
        rest = RestConnection(self.kv_servers[1])
        rest.set_data_path(data_path=self.kv_servers[1].data_path,index_path=self.kv_servers[1].index_path,cbas_path=self.kv_servers[1].cbas_path)
        result = self.add_node(self.kv_servers[1], services=["kv"], rebalance=False)
        self.assertTrue(result, msg="Failed to add KV node.")

        self.log.info("Add one more KV node")
        rest = RestConnection(self.kv_servers[3])
        rest.set_data_path(data_path=self.kv_servers[3].data_path,index_path=self.kv_servers[3].index_path,cbas_path=self.kv_servers[3].cbas_path)
        result = self.add_node(self.kv_servers[3], services=["kv"], rebalance=False)
        self.assertTrue(result, msg="Failed to add KV node.")

        self.log.info("Add one more KV node")
        rest = RestConnection(self.kv_servers[4])
        rest.set_data_path(data_path=self.kv_servers[4].data_path,index_path=self.kv_servers[4].index_path,cbas_path=self.kv_servers[4].cbas_path)
        result = self.add_node(self.kv_servers[4], services=["kv"], rebalance=False)
        self.assertTrue(result, msg="Failed to add KV node.")
                 
        self.log.info("Add a CBAS nodes")
        result = self.add_node(self.cbas_servers[0], services=["cbas"], rebalance=True)
        self.assertTrue(result, msg="Failed to add CBAS node.")
         
        nodes_in_cluster = nodes_in_cluster + [self.query_node, self.kv_servers[1], self.kv_servers[3], self.kv_servers[4], self.cbas_servers[0]]
        ########################################################################################################################
        self.log.info("Step 2: Create Couchbase buckets.")
        self.create_required_buckets()
        
        ########################################################################################################################
        self.log.info("Step 3: Create 10M docs average of 1k docs for 8 couchbase buckets.")
        
        GleambookUsers = buck(name="GleambookUsers", authType=None, saslPassword=None,
                            num_replicas=self.num_replicas,
                            bucket_size=self.bucket_size,
                            eviction_policy='noEviction', lww=self.lww)
        
        items_start_from = 0
        total_num_items = self.input.param("num_items",1000000)
        num_query = self.input.param("num_query",240)

        self.use_replica_to = False
        self.rate_limit = self.input.param('rate_limit', '100000')
        load_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_load",
                                 args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0))
        self.log.info('starting the load thread...')
        load_thread.start()
        load_thread.join()
        
        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items/10
        items_start_from += total_num_items
        ########################################################################################################################
        self.log.info("Step 4: Create 8 analytics buckets and 8 datasets and connect.")
        load_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_load",
                                 args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0))
        self.log.info('starting the load thread...')
        load_thread.start()
        
        items_start_from += total_num_items
        self.setup_cbas()
        load_thread.join()
                 
        ########################################################################################################################
        self.log.info("Step 5: Wait for ingestion to complete.")
        self.sleep(10,"Wait for the ingestion to complete")
         
        ########################################################################################################################
        self.log.info("Step 6: Verify the items count.")
        self.validate_items_count()
        
        ########################################################################################################################
        self.log.info("Step 7: Disconnect CBAS bucket and create secondary indexes.")
        self.disconnect_cbas_buckets()
        self.create_cbas_indexes()
         
        ########################################################################################################################
        self.log.info("Step 8: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items",5000)
        executors=[]
        query_executors = 1
        num_executors = query_executors

        upsert_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_delete",
                                 args=(self.master, GleambookUsers, num_items/10,10000,4, updates_from,1, 0))
        delete_thread = Thread(target=self.delete_buckets_with_high_ops,
                                 name="high_ops_delete",
                                 args=(self.master, GleambookUsers, num_items/10, self.rate_limit, 10000, 2, deletes_from,1))
        delete_thread.start()
        upsert_thread.start()
        
        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        
        delete_thread.join()
        upsert_thread.join()
        
        ########################################################################################################################
        self.log.info("Step 9: Connect cbas buckets.")
        self.connect_cbas_buckets()
        self.sleep(10,"Wait for the ingestion to complete")
         
        ########################################################################################################################
        self.log.info("Step 10: Verify the items count.")
        self.validate_items_count()
         
        ########################################################################################################################
        self.log.info("Step 12: When 11 is in progress do a KV Rebalance in of 1 nodes.")
        rest = RestConnection(self.kv_servers[2])
        rest.set_data_path(data_path=self.kv_servers[2].data_path,index_path=self.kv_servers[2].index_path,cbas_path=self.kv_servers[2].cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, [self.kv_servers[2]], [])
        nodes_in_cluster += [self.kv_servers[2]]
        ########################################################################################################################
        self.log.info("Step 11: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items",1000000)
        num_query = self.input.param("num_query",240)

        self.use_replica_to = False
        self.rate_limit = self.input.param('rate_limit', '100000')
        load_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_load",
                                 args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0))
        self.log.info('starting the load thread...')
        load_thread.start()

        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        load_thread.join()
        
        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items/10
        items_start_from += total_num_items
                 
        ########################################################################################################################
        self.log.info("Step 13: Wait for rebalance to complete.")
        
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)
         
        ########################################################################################################################
        self.log.info("Step 14: Verify the items count.")
        self.validate_items_count()
         
        ########################################################################################################################
        self.log.info("Step 15: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items",5000)
        executors=[]
        query_executors = 1
        num_executors = query_executors

        upsert_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_delete",
                                 args=(self.master, GleambookUsers, num_items/10,10000,4, updates_from,1, 0))
        delete_thread = Thread(target=self.delete_buckets_with_high_ops,
                                 name="high_ops_delete",
                                 args=(self.master, GleambookUsers, num_items/10, self.rate_limit, 10000, 2, deletes_from,1))
        delete_thread.start()
        upsert_thread.start()
        
        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        
        delete_thread.join()
        upsert_thread.join()
        ########################################################################################################################
        self.log.info("Step 16: Verify Results that 1M docs gets deleted from analytics datasets.")
        self.validate_items_count()
         
        ########################################################################################################################
        self.log.info("Step 17: Disconnect CBAS buckets.")
        self.disconnect_cbas_buckets()
         
        ########################################################################################################################
        self.log.info("Step 18: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items",1000000)
        num_query = self.input.param("num_query",240)

        self.use_replica_to = False
        self.rate_limit = self.input.param('rate_limit', '100000')
        load_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_load",
                                 args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0))
        self.log.info('starting the load thread...')
        load_thread.start()

        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        load_thread.join()
 
        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items/10
        items_start_from += total_num_items
         
        ########################################################################################################################
        self.log.info("Step 19: Multiple Connect/Disconnect CBAS buckets during ingestion in step 18.")
        self.connect_cbas_buckets()
        self.sleep(5)
        self.disconnect_cbas_buckets()
        self.connect_cbas_buckets()
        self.sleep(5)
        self.disconnect_cbas_buckets()
        self.connect_cbas_buckets()
        self.sleep(5)
        self.disconnect_cbas_buckets()
        self.connect_cbas_buckets()
        ########################################################################################################################
        self.log.info("Step 20: Verify the docs count.")
        self.validate_items_count()
         
        ########################################################################################################################
        self.log.info("Step 21: Run 500 complex queries concurrently and verify the results.")
        pool = Executors.newFixedThreadPool(5)
        num_query = self.input.param("num_query",500)
        executors=[]
        num_executors = 5
        query_executors = num_executors
        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
         
        self.log.info("Step 22: When 21 is in progress do a KV Rebalance out of 2 nodes.")
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, [], self.kv_servers[1:2])
        nodes_in_cluster = [node for node in nodes_in_cluster if node not in self.kv_servers[1:2]]
         
        futures = pool.invokeAll(executors)
        self.log.info("Step 23: Wait for rebalance.")
        
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)
         
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
         
        ########################################################################################################################
        self.log.info("Step 24: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        executors=[]
        num_executors = 2
        query_executors = num_executors

        total_num_items = self.input.param("num_items",1000000)
        num_query = self.input.param("num_query",240)
        
        self.use_replica_to = False
        self.rate_limit = self.input.param('rate_limit', '100000')
        load_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_load",
                                 args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0))
        self.log.info('starting the load thread...')
        load_thread.start()
        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
         
        self.log.info("Step 26: Run 500 complex queries concurrently and verify the results.")
        executors.append(QueryRunner(random.choice(queries),500,self.cbas_util))
         
         
        ##################################################### NEED TO BE UPDATED ##################################################################
        self.log.info("Step 25: When 24 is in progress do a CBAS Rebalance in of 2 nodes.")
        for node in self.cbas_servers[2:]:
            rest = RestConnection(node)
            rest.set_data_path(data_path=node.data_path,index_path=node.index_path,cbas_path=node.cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, self.cbas_servers[1:],[],services=["cbas","cbas"])
        nodes_in_cluster = nodes_in_cluster + self.cbas_servers[1:]
        futures = pool.invokeAll(executors)
        
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        load_thread.join()
        
        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items/10
        items_start_from += total_num_items
 
        self.log.info("Step 27: Wait for rebalance to complete.")
        
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
         
        ########################################################################################################################
        self.log.info("Step 28: Verify the docs count.")
        self.validate_items_count()
         
        ########################################################################################################################
        self.log.info("Step 29: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items",5000)
        executors=[]
        query_executors = 1
        num_executors = query_executors

        upsert_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_delete",
                                 args=(self.master, GleambookUsers, num_items/10,10000,4, updates_from,1, 0))
        delete_thread = Thread(target=self.delete_buckets_with_high_ops,
                                 name="high_ops_delete",
                                 args=(self.master, GleambookUsers, num_items/10, self.rate_limit, 10000, 2, deletes_from,1))
        delete_thread.start()
        upsert_thread.start()
        
        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        
        delete_thread.join()
        upsert_thread.join()
        ########################################################################################################################
        self.log.info("Step 30: Verify the docs count.")
        self.validate_items_count()
 
        ########################################################################################################################
        self.log.info("Step 31: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items",1000000)
        num_query = self.input.param("num_query",240)

        self.use_replica_to = False
        self.rate_limit = self.input.param('rate_limit', '100000')
        load_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_load",
                                 args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0))
        self.log.info('starting the load thread...')
        load_thread.start()

        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
 
        ###################################################### NEED TO BE UPDATED ##################################################################
        self.log.info("Step 32: When 31 is in progress do a CBAS Rebalance out of 1 nodes.")
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, [], self.cbas_servers[-1:])
        nodes_in_cluster = [node for node in nodes_in_cluster if node not in self.cbas_servers[-1:]]
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        load_thread.join()
 
        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items/10
        items_start_from += total_num_items
        #######################################################################################################################
        self.log.info("Step 33: Wait for rebalance to complete.")
        
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)
 
        ########################################################################################################################
        self.log.info("Step 34: Verify the docs count.")
        self.validate_items_count()
         
        ########################################################################################################################
        self.log.info("Step 35: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items",5000)
        executors=[]
        query_executors = 1
        num_executors = query_executors

        upsert_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_delete",
                                 args=(self.master, GleambookUsers, num_items/10,10000,4, updates_from,1, 0))
        delete_thread = Thread(target=self.delete_buckets_with_high_ops,
                                 name="high_ops_delete",
                                 args=(self.master, GleambookUsers, num_items/10, self.rate_limit, 10000, 2, deletes_from,1))
        delete_thread.start()
        upsert_thread.start()
        
        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        
        delete_thread.join()
        upsert_thread.join()
        ########################################################################################################################
        self.log.info("Step 36: Verify the docs count.")
        self.validate_items_count()
         
        ########################################################################################################################
        self.log.info("Step 37: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items",1000000)
        num_query = self.input.param("num_query",240)

        self.use_replica_to = False
        self.rate_limit = self.input.param('rate_limit', '100000')
        load_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_load",
                                 args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0))
        self.log.info('starting the load thread...')
        load_thread.start()

        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
         
        ###################################################### NEED TO BE UPDATED ##################################################################
        self.log.info("Step 38: When 37 is in progress do a CBAS CC SWAP Rebalance of 2 nodes.")
        
        for node in self.cbas_servers[-1:]:
            rest = RestConnection(node)
            rest.set_data_path(data_path=node.data_path,index_path=node.index_path,cbas_path=node.cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster,self.cbas_servers[-1:], [self.cbas_node],services=["cbas"],check_vbucket_shuffling=False)
        nodes_in_cluster += self.cbas_servers[-1:]
        nodes_in_cluster.remove(self.cbas_node)
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        load_thread.join()
        
        ########################################################################################################################
        self.log.info("Step 39: Wait for rebalance to complete.")
        
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)
         
        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items/10
        items_start_from += total_num_items
         
        ########################################################################################################################
        self.log.info("Step 40: Verify the docs count.")
        self.validate_items_count()
 
        ########################################################################################################################
        self.log.info("Step 41: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items",5000)
        executors=[]
        query_executors = 1
        num_executors = query_executors

        upsert_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_delete",
                                 args=(self.master, GleambookUsers, num_items/10,10000,4, updates_from,1, 0))
        delete_thread = Thread(target=self.delete_buckets_with_high_ops,
                                 name="high_ops_delete",
                                 args=(self.master, GleambookUsers, num_items/10, self.rate_limit, 10000, 2, deletes_from,1))
        delete_thread.start()
        upsert_thread.start()
        
        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        
        delete_thread.join()
        upsert_thread.join()
        ########################################################################################################################
        self.log.info("Step 42: Verify the docs count.")
        self.validate_items_count() 
         
        ########################################################################################################################
        self.log.info("Step 43: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items",1000000)
        num_query = self.input.param("num_query",240)

        self.use_replica_to = False
        self.rate_limit = self.input.param('rate_limit', '100000')
        load_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_load",
                                 args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0))
        self.log.info('starting the load thread...')
        load_thread.start()

        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
         
        ###################################################### NEED TO BE UPDATED ##################################################################
        self.log.info("Step 44: When 43 is in progress do a KV+CBAS Rebalance IN.")
        rest = RestConnection(self.cbas_node)
        rest.set_data_path(data_path=self.cbas_node.data_path,index_path=self.cbas_node.index_path,cbas_path=self.cbas_node.cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, [self.cbas_node], [],services=["cbas"])
        nodes_in_cluster += [self.cbas_node]
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        rest = RestConnection(self.kv_servers[1])
        rest.set_data_path(data_path=self.kv_servers[1].data_path,index_path=self.kv_servers[1].index_path,cbas_path=self.kv_servers[1].cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, [self.kv_servers[1]], [])
        nodes_in_cluster += [self.kv_servers[1]]
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        load_thread.join()
        ########################################################################################################################
        self.log.info("Step 45: Wait for rebalance to complete.")
        
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)
         
        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items/10
        items_start_from += total_num_items        
         
        ########################################################################################################################
        self.log.info("Step 46: Verify the docs count.")
        self.validate_items_count() 
         
        ########################################################################################################################
        self.log.info("Step 47: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items",5000)
        executors=[]
        query_executors = 1
        num_executors = query_executors

        upsert_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_delete",
                                 args=(self.master, GleambookUsers, num_items/10,10000,4, updates_from,1, 0))
        delete_thread = Thread(target=self.delete_buckets_with_high_ops,
                                 name="high_ops_delete",
                                 args=(self.master, GleambookUsers, num_items/10, self.rate_limit, 10000, 2, deletes_from,1))
        delete_thread.start()
        upsert_thread.start()
        
        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        
        delete_thread.join()
        upsert_thread.join()
        ########################################################################################################################
        self.log.info("Step 48: Verify the docs count.")
        self.validate_items_count() 
 
        ########################################################################################################################
        self.log.info("Step 49: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items",1000000)
        num_query = self.input.param("num_query",240)

        self.use_replica_to = False
        self.rate_limit = self.input.param('rate_limit', '100000')
        load_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_load",
                                 args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0))
        self.log.info('starting the load thread...')
        load_thread.start()

        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
         
        ########################################################################################################################
        self.log.info("Step 50: When 49 is in progress do a CBAS Rebalance OUT.")
        rest = RestConnection(self.kv_servers[2])
#         rest.set_data_path(data_path=self.kv_servers[2].data_path,index_path=self.kv_servers[2].index_path,cbas_path=self.kv_servers[2].cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, [], self.cbas_servers[-1:])
#         
        nodes_in_cluster = [node for node in nodes_in_cluster if node not in self.cbas_servers[-1:]]
#         nodes_in_cluster.remove(self.kv_servers[1])
        
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        load_thread.join()
        ########################################################################################################################
        self.log.info("Step 51: Wait for rebalance to complete.")
        
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)
        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items/10
        items_start_from += total_num_items  
 
        ########################################################################################################################
        self.log.info("Step 52: Verify the docs count.")
        self.validate_items_count() 
 
        ########################################################################################################################
        self.log.info("Step 53: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items",5000)
        executors=[]
        query_executors = 1
        num_executors = query_executors

        upsert_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_delete",
                                 args=(self.master, GleambookUsers, num_items/10,10000,4, updates_from,1, 0))
        delete_thread = Thread(target=self.delete_buckets_with_high_ops,
                                 name="high_ops_delete",
                                 args=(self.master, GleambookUsers, num_items/10, self.rate_limit, 10000, 2, deletes_from,1))
        delete_thread.start()
        upsert_thread.start()
        
        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        
        delete_thread.join()
        upsert_thread.join()
        ########################################################################################################################
        self.log.info("Step 54: Verify the docs count.")
        self.validate_items_count() 
         
         
        ########################################################################################################################
        self.log.info("Step 55: Create 10M docs.")
        pool = Executors.newFixedThreadPool(5)
        total_num_items = self.input.param("num_items",1000000)
        num_query = self.input.param("num_query",240)

        self.use_replica_to = False
        self.rate_limit = self.input.param('rate_limit', '100000')
        load_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_load",
                                 args=(self.master, GleambookUsers, total_num_items,50,4, items_start_from,2, 0))
        self.log.info('starting the load thread...')
        load_thread.start()

        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
         
        ########################################################################################################################
        self.log.info("Step 56: When 55 is in progress do a CBAS Rebalance IN.")
        for node in self.cbas_servers[-1:]:
            rest = RestConnection(node)
            rest.set_data_path(data_path=node.data_path,index_path=node.index_path,cbas_path=node.cbas_path)
        rest = RestConnection(self.cbas_servers[-1])
        rest.set_data_path(data_path=self.cbas_servers[-1].data_path,index_path=self.cbas_servers[-1].index_path,cbas_path=self.cbas_servers[-1].cbas_path)
        rebalance = self.cluster.async_rebalance(nodes_in_cluster, self.cbas_servers[-1:], [], services=["cbas"])
        nodes_in_cluster += self.cbas_servers[-1:]
         
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        load_thread.join()
        ########################################################################################################################
        self.log.info("Step 57: Wait for rebalance to complete.")
        
        reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
        self.assertTrue(reached, "rebalance failed, stuck or did not complete")
        self.sleep(20)
         
        updates_from = items_start_from
        deletes_from = items_start_from + total_num_items/10
        items_start_from += total_num_items  
 
        ########################################################################################################################
        self.log.info("Step 58: Verify the docs count.")
        self.validate_items_count() 
         
        ########################################################################################################################
        self.log.info("Step 59: Delete 1M docs. Update 1M docs.")
        pool = Executors.newFixedThreadPool(5)
        num_items = self.input.param("num_items",5000)
        executors=[]
        query_executors = 1
        num_executors = query_executors

        upsert_thread = Thread(target=self.load_buckets_with_high_ops,
                                 name="high_ops_delete",
                                 args=(self.master, GleambookUsers, num_items/10,10000,4, updates_from,1, 0))
        delete_thread = Thread(target=self.delete_buckets_with_high_ops,
                                 name="high_ops_delete",
                                 args=(self.master, GleambookUsers, num_items/10, self.rate_limit, 10000, 2, deletes_from,1))
        delete_thread.start()
        upsert_thread.start()
        
        for i in xrange(query_executors):
            executors.append(QueryRunner(random.choice(queries),num_query,self.cbas_util))
        futures = pool.invokeAll(executors)
        for future in futures:
            print future.get(num_executors, TimeUnit.SECONDS)
        print "Executors completed!!"
        shutdown_and_await_termination(pool, num_executors)
        
        delete_thread.join()
        upsert_thread.join()
        ########################################################################################################################
        self.log.info("Step 60: Verify the docs count.")
        self.validate_items_count() 
                 
 
        print "End Time: %s"%str(time.strftime("%H:%M:%S", time.gmtime(time.time())))