示例#1
0
    def setUp(self):
        super(basic_ops, self).setUp()

        self.key = 'test_docs'.rjust(self.key_size, '0')

        nodes_init = self.cluster.servers[1:self.nodes_init] \
            if self.nodes_init != 1 else []
        self.task.rebalance([self.cluster.master], nodes_init, [])
        self.cluster.nodes_in_cluster.extend([self.cluster.master] +
                                             nodes_init)
        self.bucket_util.create_default_bucket(
            replica=self.num_replicas,
            compression_mode=self.compression_mode,
            bucket_type=self.bucket_type)
        self.bucket_util.add_rbac_user()

        self.src_bucket = self.bucket_util.get_all_buckets()
        self.durability_helper = DurabilityHelper(
            self.log,
            len(self.cluster.nodes_in_cluster),
            durability=self.durability_level,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to)
        # Reset active_resident_threshold to avoid further data load as DGM
        self.active_resident_threshold = 0
        self.cluster_util.print_cluster_stats()
        self.bucket_util.print_bucket_stats()
        self.log.info("==========Finished Basic_ops base setup========")
示例#2
0
 def setUp(self):
     super(UpgradeTests, self).setUp()
     self.durability_helper = DurabilityHelper(
         self.log, len(self.cluster.nodes_in_cluster))
     self.verification_dict = dict()
     self.verification_dict["ops_create"] = self.num_items
     self.verification_dict["ops_delete"] = 0
示例#3
0
    def setUp(self):
        super(BucketDurabilityBase, self).setUp()

        if len(self.cluster.servers) < self.nodes_init:
            self.fail("Not enough nodes for rebalance")

        # Disable auto-failover to avoid failover of nodes
        status = RestConnection(self.cluster.master) \
            .update_autofailover_settings(False, 120, False)
        self.assertTrue(status, msg="Failure during disabling auto-failover")

        self.durability_helper = DurabilityHelper(
            self.log, len(self.cluster.nodes_in_cluster))
        self.kv_nodes = self.cluster_util.get_kv_nodes(self.cluster)

        self.num_nodes_affected = 1
        if self.num_replicas > 1:
            self.num_nodes_affected = 2

        # Bucket create options representation
        self.bucket_template = dict()
        self.bucket_template[Bucket.name] = "default"
        self.bucket_template[Bucket.ramQuotaMB] = 100
        self.bucket_template[Bucket.replicaNumber] = self.num_replicas
        if self.bucket_type == Bucket.Type.MEMBASE:
            self.bucket_template[Bucket.storageBackend] = self.bucket_storage

        # These two params will be set during each iteration
        self.bucket_template[Bucket.bucketType] = None
        self.bucket_template[Bucket.durabilityMinLevel] = None

        self.bucket_types_to_test = [
            Bucket.Type.MEMBASE, Bucket.Type.EPHEMERAL, Bucket.Type.MEMCACHED
        ]

        self.d_level_order = [
            Bucket.DurabilityLevel.NONE, Bucket.DurabilityLevel.MAJORITY,
            Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE,
            Bucket.DurabilityLevel.PERSIST_TO_MAJORITY
        ]

        # Dict representing the possible levels supported by each bucket type
        self.possible_d_levels = dict()
        self.possible_d_levels[Bucket.Type.MEMBASE] = \
            self.bucket_util.get_supported_durability_levels()
        self.possible_d_levels[Bucket.Type.EPHEMERAL] = [
            Bucket.DurabilityLevel.NONE, Bucket.DurabilityLevel.MAJORITY
        ]
        self.possible_d_levels[Bucket.Type.MEMCACHED] = [
            Bucket.DurabilityLevel.NONE
        ]

        # Dict to store the list of active/replica VBs in each node
        self.vbs_in_node = dict()
        for node in self.cluster_util.get_kv_nodes(self.cluster):
            shell = RemoteMachineShellConnection(node)
            self.vbs_in_node[node] = dict()
            self.vbs_in_node[node]["shell"] = shell
        self.log.info("===== BucketDurabilityBase setup complete =====")
 def setUp(self):
     super(AutoFailoverBaseTest, self).setUp()
     self._get_params()
     self.rest = RestConnection(self.orchestrator)
     self.initial_load_gen = self.get_doc_generator(0, self.num_items)
     self.update_load_gen = self.get_doc_generator(0, self.update_items)
     self.delete_load_gen = self.get_doc_generator(self.update_items,
                                                   self.delete_items)
     self.set_up_cluster()
     self.load_all_buckets(self.initial_load_gen, "create", 0)
     self.server_index_to_fail = self.input.param("server_index_to_fail",
                                                  None)
     self.new_replica = self.input.param("new_replica", None)
     self.replica_update_during = self.input.param("replica_update_during",
                                                   None)
     if self.server_index_to_fail is None:
         self.server_to_fail = self._servers_to_fail()
     else:
         self.server_to_fail = [
             self.cluster.servers[self.server_index_to_fail]
         ]
     self.servers_to_add = self.cluster.servers[self.
                                                nodes_init:self.nodes_init +
                                                self.nodes_in]
     self.servers_to_remove = self.cluster.servers[self.nodes_init -
                                                   self.nodes_out:self.
                                                   nodes_init]
     self.durability_helper = DurabilityHelper(self.log,
                                               len(self.cluster.servers),
                                               self.durability_level)
     self.active_vb_in_failover_nodes = list()
     self.replica_vb_in_failover_nodes = list()
     self.get_vbucket_info_from_failover_nodes()
     self.cluster_util.print_cluster_stats()
     self.bucket_util.print_bucket_stats()
示例#5
0
    def test_replica_update(self):
        if self.atomicity:
            replica_count = 3
        else:
            replica_count = 4
        if self.nodes_init < 2:
            self.log.error("Test not supported for < 2 node cluster")
            return

        doc_ops = self.input.param("doc_ops", "")
        bucket_helper = BucketHelper(self.cluster.master)

        doc_count = self.num_items
        start_doc_for_insert = self.num_items

        self.is_sync_write_enabled = DurabilityHelper.is_sync_write_enabled(
            self.bucket_durability_level, self.durability_level)
        # Replica increment tests
        doc_count, start_doc_for_insert = self.generic_replica_update(
            doc_count,
            doc_ops,
            bucket_helper,
            range(1, min(replica_count, self.nodes_init)),
            start_doc_for_insert)

        # Replica decrement tests
        _, _ = self.generic_replica_update(
            doc_count,
            doc_ops,
            bucket_helper,
            range(min(replica_count, self.nodes_init)-2, -1, -1),
            start_doc_for_insert)
示例#6
0
    def setUp(self):
        super(CollectionBase, self).setUp()
        self.log_setup_status("CollectionBase", "started")

        self.key = 'test_collection'.rjust(self.key_size, '0')
        self.simulate_error = self.input.param("simulate_error", None)
        self.error_type = self.input.param("error_type", "memory")
        self.doc_ops = self.input.param("doc_ops", None)
        # If True, creates bucket/scope/collections with simpler names
        self.use_simple_names = self.input.param("use_simple_names", True)
        self.spec_name = self.input.param("bucket_spec",
                                          "single_bucket.default")
        self.data_spec_name = self.input.param("data_spec_name",
                                               "initial_load")
        self.remove_default_collection = \
            self.input.param("remove_default_collection", False)

        self.action_phase = self.input.param("action_phase",
                                             "before_default_load")
        self.skip_collections_cleanup = \
            self.input.param("skip_collections_cleanup", False)
        self.validate_docs_count_during_teardown = \
            self.input.param("validate_docs_count_during_teardown", False)
        self.batch_size = self.input.param("batch_size", 200)
        self.process_concurrency = self.input.param("process_concurrency", 1)
        self.retry_get_process_num = \
            self.input.param("retry_get_process_num", 200)
        self.change_magma_quota = self.input.param("change_magma_quota", False)
        self.crud_batch_size = 100
        self.num_nodes_affected = 1
        if self.num_replicas > 1:
            self.num_nodes_affected = 2

        if self.doc_ops:
            self.doc_ops = self.doc_ops.split(';')

        self.durability_helper = DurabilityHelper(
            self.log, len(self.cluster.nodes_in_cluster),
            self.durability_level)

        # Disable auto-failover to avoid failover of nodes
        status = RestConnection(self.cluster.master) \
            .update_autofailover_settings(False, 120)
        self.assertTrue(status, msg="Failure during disabling auto-failover")
        self.bucket_helper_obj = BucketHelper(self.cluster.master)
        self.disk_optimized_thread_settings = self.input.param("disk_optimized_thread_settings", False)
        if self.disk_optimized_thread_settings:
            self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                   num_reader_threads="disk_io_optimized")

        try:
            self.collection_setup()
        except Java_base_exception as exception:
            self.handle_setup_exception(exception)
        except Exception as exception:
            self.handle_setup_exception(exception)
        self.supported_d_levels = \
            self.bucket_util.get_supported_durability_levels()
        self.log_setup_status("CollectionBase", "complete")
示例#7
0
 def setUp(self):
     super(RebalanceBaseTest, self).setUp()
     self.doc_ops = self.input.param("doc_ops", "create")
     self.doc_size = self.input.param("doc_size", 10)
     self.key_size = self.input.param("key_size", 0)
     self.zone = self.input.param("zone", 1)
     self.new_replica = self.input.param("new_replica", None)
     self.default_view_name = "default_view"
     self.defaul_map_func = "function (doc) {\n  emit(doc._id, doc);\n}"
     self.default_view = View(self.default_view_name, self.defaul_map_func,
                              None)
     self.max_verify = self.input.param("max_verify", None)
     self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
     self.key = 'test_docs'.rjust(self.key_size, '0')
     nodes_init = self.cluster.servers[
         1:self.nodes_init] if self.nodes_init != 1 else []
     self.task.rebalance([self.cluster.master], nodes_init, [])
     self.cluster.nodes_in_cluster.extend([self.cluster.master] +
                                          nodes_init)
     self.bucket_util.create_default_bucket(replica=self.num_replicas)
     self.bucket_util.add_rbac_user()
     self.sleep(10)
     gen_create = self.get_doc_generator(0, self.num_items)
     self.print_cluster_stat_task = self.cluster_util.async_print_cluster_stats(
     )
     for bucket in self.bucket_util.buckets:
         task = self.task.async_load_gen_docs(
             self.cluster,
             bucket,
             gen_create,
             "create",
             0,
             persist_to=self.persist_to,
             replicate_to=self.replicate_to,
             batch_size=10,
             timeout_secs=self.sdk_timeout,
             process_concurrency=8,
             retries=self.sdk_retries,
             durability=self.durability_level)
         self.task.jython_task_manager.get_task_result(task)
         self.sleep(20)
         current_item = self.bucket_util.get_bucket_current_item_count(
             self.cluster, bucket)
         self.num_items = current_item
         self.log.info("Inserted {} number of items after loadgen".format(
             self.num_items))
     self.gen_load = self.get_doc_generator(0, self.num_items)
     # gen_update is used for doing mutation for 1/2th of uploaded data
     self.gen_update = self.get_doc_generator(0, (self.num_items / 2))
     self.durability_helper = DurabilityHelper(
         self.log,
         len(self.cluster.nodes_in_cluster),
         durability=self.durability_level,
         replicate_to=self.replicate_to,
         persist_to=self.persist_to)
     self.log.info("==========Finished rebalance base setup========")
示例#8
0
    def setUp(self):
        super(CollectionBase, self).setUp()
        self.log_setup_status("CollectionBase", "started")

        self.MAX_SCOPES = CbServer.max_scopes
        self.MAX_COLLECTIONS = CbServer.max_collections
        self.key = 'test_collection'.rjust(self.key_size, '0')
        self.simulate_error = self.input.param("simulate_error", None)
        self.error_type = self.input.param("error_type", "memory")
        self.doc_ops = self.input.param("doc_ops", None)
        self.spec_name = self.input.param("bucket_spec",
                                          "single_bucket.default")
        self.data_spec_name = self.input.param("data_spec_name",
                                               "initial_load")
        self.remove_default_collection = \
            self.input.param("remove_default_collection", False)

        self.action_phase = self.input.param("action_phase",
                                             "before_default_load")
        self.skip_collections_cleanup = \
            self.input.param("skip_collections_cleanup", False)
        self.validate_docs_count_during_teardown = \
            self.input.param("validate_docs_count_during_teardown", False)
        self.batch_size = self.input.param("batch_size", 200)
        self.vbuckets = self.input.param("vbuckets",
                                         self.cluster_util.vbuckets)
        self.retry_get_process_num = self.input.param("retry_get_process_num",
                                                      25)

        self.crud_batch_size = 100
        self.num_nodes_affected = 1
        if self.num_replicas > 1:
            self.num_nodes_affected = 2

        if self.doc_ops:
            self.doc_ops = self.doc_ops.split(';')

        self.durability_helper = DurabilityHelper(
            self.log, len(self.cluster.nodes_in_cluster),
            self.durability_level)

        # Disable auto-failover to avoid failover of nodes
        status = RestConnection(self.cluster.master) \
            .update_autofailover_settings(False, 120, False)
        self.assertTrue(status, msg="Failure during disabling auto-failover")
        self.bucket_helper_obj = BucketHelper(self.cluster.master)

        try:
            self.collection_setup()
        except Java_base_exception as exception:
            self.handle_setup_exception(exception)
        except Exception as exception:
            self.handle_setup_exception(exception)
        self.supported_d_levels = \
            self.bucket_util.get_supported_durability_levels()
        self.log_setup_status("CollectionBase", "complete")
示例#9
0
    def test_with_sync_write(self):
        cluster_node = choice(self.kv_nodes)
        target_vb_type, simulate_error = \
            DurabilityHelper.get_vb_and_error_type(self.durability_level)
        doc_gen = doc_generator(
            self.key,
            0,
            2,
            target_vbucket=self.node_data[cluster_node]["%s_vbs" %
                                                        target_vb_type])
        client = self.sdk_client_pool.get_client_for_bucket(
            self.bucket, self.scope_name, self.collection_name)

        key_1, value_1 = doc_gen.next()
        key_2, value_2 = doc_gen.next()

        if self.doc_ops[0] != DocLoading.Bucket.DocOps.CREATE:
            client.crud(DocLoading.Bucket.DocOps.CREATE, key_1, value_1)
        if self.doc_ops[1] != DocLoading.Bucket.DocOps.CREATE:
            client.crud(DocLoading.Bucket.DocOps.CREATE, key_2, value_2)

        sync_op = Thread(target=self.crud,
                         args=[client, self.doc_ops[0], key_1],
                         kwargs={
                             "value": value_1,
                             "durability": self.durability_level,
                             "expected_thread_val": 1
                         })
        async_op = Thread(target=self.crud,
                          args=[client, self.doc_ops[1], key_2],
                          kwargs={
                              "value": value_2,
                              "expected_thread_val": 0
                          })

        cb_err = CouchbaseError(self.log,
                                self.node_data[cluster_node]["shell"])
        cb_err.create(simulate_error, self.bucket.name)

        # Start doc_ops
        sync_op.start()
        self.sleep(1, "Wait before async operation")
        async_op.start()

        # Wait for ops to complete
        async_op.join()
        cb_err.revert(simulate_error, self.bucket.name)
        sync_op.join()

        self.validate_test_failure()
    def test_sync_write_in_progress(self):
        doc_ops = self.input.param("doc_ops", "create;create").split(';')
        shell_conn = dict()
        cbstat_obj = dict()
        error_sim = dict()
        vb_info = dict()
        active_vbs = dict()
        replica_vbs = dict()

        # Override d_level, error_simulation type based on d_level
        self.__get_d_level_and_error_to_simulate()

        # Acquire SDK client from the pool for performing doc_ops locally
        client = SDKClient([self.cluster.master], self.bucket)

        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)
        for node in target_nodes:
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(node)
            vb_info["init"] = dict()
            vb_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                self.bucket.name)
            error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip])
            # Fetch affected nodes' vb_num which are of type=replica
            active_vbs[node.ip] = cbstat_obj[node.ip].vbucket_list(
                self.bucket.name, vbucket_type="active")
            replica_vbs[node.ip] = cbstat_obj[node.ip].vbucket_list(
                self.bucket.name, vbucket_type="replica")

        if self.durability_level \
                == Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE:
            target_vbs = active_vbs
            target_vbuckets = list()
            for target_node in target_nodes:
                target_vbuckets += target_vbs[target_node.ip]
        else:
            target_vbuckets = replica_vbs[target_nodes[0].ip]
            if len(target_nodes) > 1:
                index = 1
                while index < len(target_nodes):
                    target_vbuckets = list(
                        set(target_vbuckets).intersection(
                            set(replica_vbs[target_nodes[index].ip])))
                    index += 1

        doc_load_spec = dict()
        doc_load_spec["doc_crud"] = dict()
        doc_load_spec["doc_crud"][MetaCrudParams.DocCrud.COMMON_DOC_KEY] \
            = "test_collections"
        doc_load_spec[MetaCrudParams.TARGET_VBUCKETS] = target_vbuckets
        doc_load_spec[MetaCrudParams.COLLECTIONS_CONSIDERED_FOR_CRUD] = 5
        doc_load_spec[MetaCrudParams.SCOPES_CONSIDERED_FOR_CRUD] = "all"
        doc_load_spec[MetaCrudParams.DURABILITY_LEVEL] = self.durability_level
        doc_load_spec[MetaCrudParams.SDK_TIMEOUT] = 60

        if doc_ops[0] == DocLoading.Bucket.DocOps.CREATE:
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops[0] == DocLoading.Bucket.DocOps.UPDATE:
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.UPDATE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops[0] == DocLoading.Bucket.DocOps.REPLACE:
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.REPLACE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops[0] == DocLoading.Bucket.DocOps.DELETE:
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.DELETE_PERCENTAGE_PER_COLLECTION] = 1

        # Induce error condition for testing
        for node in target_nodes:
            error_sim[node.ip].create(self.simulate_error,
                                      bucket_name=self.bucket.name)
            self.sleep(3, "Wait for error simulation to take effect")

        doc_loading_task = \
            self.bucket_util.run_scenario_from_spec(
                self.task,
                self.cluster,
                self.cluster.buckets,
                doc_load_spec,
                async_load=True)

        self.sleep(5, "Wait for doc ops to reach server")

        for bucket, s_dict in doc_loading_task.loader_spec.items():
            for s_name, c_dict in s_dict["scopes"].items():
                for c_name, c_meta in c_dict["collections"].items():
                    client.select_collection(s_name, c_name)
                    for op_type in c_meta:
                        key, value = c_meta[op_type]["doc_gen"].next()
                        if self.with_non_sync_writes:
                            fail = client.crud(doc_ops[1],
                                               key,
                                               value,
                                               exp=0,
                                               timeout=2,
                                               time_unit="seconds")
                        else:
                            fail = client.crud(
                                doc_ops[1],
                                key,
                                value,
                                exp=0,
                                durability=self.durability_level,
                                timeout=2,
                                time_unit="seconds")

                        expected_exception = \
                            SDKException.AmbiguousTimeoutException
                        retry_reason = \
                            SDKException.RetryReason.KV_SYNC_WRITE_IN_PROGRESS
                        if doc_ops[0] == DocLoading.Bucket.DocOps.CREATE \
                                and doc_ops[1] in \
                                [DocLoading.Bucket.DocOps.DELETE,
                                 DocLoading.Bucket.DocOps.REPLACE]:
                            expected_exception = \
                                SDKException.DocumentNotFoundException
                            retry_reason = None

                        # Validate the returned error from the SDK
                        if expected_exception not in str(fail["error"]):
                            self.log_failure("Invalid exception for %s: %s" %
                                             (key, fail["error"]))
                        if retry_reason \
                                and retry_reason not in str(fail["error"]):
                            self.log_failure(
                                "Invalid retry reason for %s: %s" %
                                (key, fail["error"]))

                        # Try reading the value in SyncWrite state
                        fail = client.crud("read", key)
                        if doc_ops[0] == "create":
                            # Expected KeyNotFound in case of CREATE op
                            if fail["status"] is True:
                                self.log_failure(
                                    "%s returned value during SyncWrite %s" %
                                    (key, fail))
                        else:
                            # Expects prev val in case of other operations
                            if fail["status"] is False:
                                self.log_failure(
                                    "Key %s read failed for prev value: %s" %
                                    (key, fail))

        # Revert the introduced error condition
        for node in target_nodes:
            error_sim[node.ip].revert(self.simulate_error,
                                      bucket_name=self.bucket.name)

        # Wait for doc_loading to complete
        self.task_manager.get_task_result(doc_loading_task)
        self.bucket_util.validate_doc_loading_results(doc_loading_task)
        if doc_loading_task.result is False:
            self.log_failure("Doc CRUDs failed")

        # Release the acquired SDK client
        client.close()
        self.validate_test_failure()
示例#11
0
    def setUp(self):
        super(DurabilityTestsBase, self).setUp()

        self.simulate_error = self.input.param("simulate_error", None)
        self.error_type = self.input.param("error_type", "memory")
        self.doc_ops = self.input.param("doc_ops", None)
        self.with_non_sync_writes = self.input.param("with_non_sync_writes",
                                                     False)
        self.skip_init_load = self.input.param("skip_init_load", False)
        self.crud_batch_size = 100
        self.num_nodes_affected = 1
        if self.num_replicas > 1:
            self.num_nodes_affected = 2

        if self.doc_ops:
            self.doc_ops = self.doc_ops.split(';')

        self.durability_helper = DurabilityHelper(
            self.log, len(self.cluster.nodes_in_cluster),
            self.durability_level)

        # Initialize cluster using given nodes
        nodes_init = self.cluster.servers[1:self.nodes_init] \
            if self.nodes_init != 1 else []
        self.task.rebalance([self.cluster.master], nodes_init, [])
        self.cluster.nodes_in_cluster.extend([self.cluster.master]+nodes_init)

        # Disable auto-failover to avoid failover of nodes
        status = RestConnection(self.cluster.master) \
            .update_autofailover_settings(False, 120, False)
        self.assertTrue(status, msg="Failure during disabling auto-failover")

        # Create default bucket and add rbac user
        self.bucket_util.create_default_bucket(
            replica=self.num_replicas, compression_mode=self.compression_mode,
            bucket_type=self.bucket_type, storage=self.bucket_storage,
            eviction_policy=self.bucket_eviction_policy)
        self.bucket_util.add_rbac_user()

        self.cluster_util.print_cluster_stats()
        self.bucket = self.bucket_util.buckets[0]

        # Create sdk_clients for pool
        if self.sdk_client_pool:
            self.log.info("Creating SDK client pool")
            self.sdk_client_pool.create_clients(
                self.bucket,
                self.cluster.nodes_in_cluster,
                req_clients=self.sdk_pool_capacity,
                compression_settings=self.sdk_compression)

        if not self.skip_init_load:
            if self.target_vbucket and type(self.target_vbucket) is not list:
                self.target_vbucket = [self.target_vbucket]

            self.log.info("Creating doc_generator..")
            doc_create = doc_generator(
                self.key,
                0,
                self.num_items,
                key_size=self.key_size,
                doc_size=self.doc_size,
                doc_type=self.doc_type,
                target_vbucket=self.target_vbucket,
                vbuckets=self.cluster_util.vbuckets)

            self.log.info("Loading {0} items into bucket"
                          .format(self.num_items))
            task = self.task.async_load_gen_docs(
                self.cluster, self.bucket, doc_create, "create", 0,
                batch_size=10, process_concurrency=8,
                replicate_to=self.replicate_to, persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                sdk_client_pool=self.sdk_client_pool)
            self.task.jython_task_manager.get_task_result(task)

            # Verify initial doc load count
            self.bucket_util._wait_for_stats_all_buckets()
            self.bucket_util.verify_stats_all_buckets(self.num_items)

        self.bucket_util.print_bucket_stats()
        self.log.info("=== DurabilityBaseTests setup complete ===")
示例#12
0
class BucketDurabilityBase(BaseTestCase):
    def setUp(self):
        super(BucketDurabilityBase, self).setUp()

        if len(self.cluster.servers) < self.nodes_init:
            self.fail("Not enough nodes for rebalance")

        # Rebalance-in required nodes for testing
        nodes_init = self.cluster.servers[1:self.nodes_init] \
            if self.nodes_init != 1 else []
        self.task.rebalance([self.cluster.master], nodes_init, [])
        self.cluster.nodes_in_cluster.extend([self.cluster.master]+nodes_init)

        # Disable auto-failover to avoid failover of nodes
        status = RestConnection(self.cluster.master) \
            .update_autofailover_settings(False, 120, False)
        self.assertTrue(status, msg="Failure during disabling auto-failover")
        self.bucket_util.add_rbac_user()

        self.durability_helper = DurabilityHelper(
            self.log,
            len(self.cluster.nodes_in_cluster))
        self.kv_nodes = self.cluster_util.get_kv_nodes()

        self.num_nodes_affected = 1
        if self.num_replicas > 1:
            self.num_nodes_affected = 2

        # Bucket create options representation
        self.bucket_template = dict()
        self.bucket_template[Bucket.name] = "default"
        self.bucket_template[Bucket.ramQuotaMB] = 100
        self.bucket_template[Bucket.replicaNumber] = self.num_replicas
        # These two params will be set during each iteration
        self.bucket_template[Bucket.bucketType] = None
        self.bucket_template[Bucket.durabilityMinLevel] = None

        # Print cluster stats
        self.cluster_util.print_cluster_stats()

        self.bucket_types_to_test = [Bucket.Type.MEMBASE,
                                     Bucket.Type.EPHEMERAL,
                                     Bucket.Type.MEMCACHED]

        self.d_level_order = [
            Bucket.DurabilityLevel.NONE,
            Bucket.DurabilityLevel.MAJORITY,
            Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE,
            Bucket.DurabilityLevel.PERSIST_TO_MAJORITY]

        # Dict representing the possible levels supported by each bucket type
        self.possible_d_levels = dict()
        self.possible_d_levels[Bucket.Type.MEMBASE] = \
            self.bucket_util.get_supported_durability_levels()
        self.possible_d_levels[Bucket.Type.EPHEMERAL] = [
            Bucket.DurabilityLevel.NONE,
            Bucket.DurabilityLevel.MAJORITY]
        self.possible_d_levels[Bucket.Type.MEMCACHED] = [
            Bucket.DurabilityLevel.NONE]

        # Dict to store the list of active/replica VBs in each node
        self.vbs_in_node = dict()
        for node in self.cluster_util.get_kv_nodes():
            shell = RemoteMachineShellConnection(node)
            self.vbs_in_node[node] = dict()
            self.vbs_in_node[node]["shell"] = shell
        self.log.info("===== BucketDurabilityBase setup complete =====")

    def tearDown(self):
        # Close all shell_connections opened in setUp()
        for node in self.vbs_in_node:
            self.vbs_in_node[node]["shell"].disconnect()

        super(BucketDurabilityBase, self).tearDown()

        self.summary.display()
        self.validate_test_failure()

    @staticmethod
    def get_cb_stat_verification_dict():
        verification_dict = dict()
        verification_dict["ops_create"] = 0
        verification_dict["ops_update"] = 0
        verification_dict["ops_delete"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["sync_write_committed_count"] = 0
        return verification_dict

    def get_vbucket_type_mapping(self, bucket_name):
        for node in self.vbs_in_node.keys():
            cb_stat = Cbstats(self.vbs_in_node[node]["shell"])
            self.vbs_in_node[node]["active"] = \
                cb_stat.vbucket_list(bucket_name, "active")
            self.vbs_in_node[node]["replica"] = \
                cb_stat.vbucket_list(bucket_name, "replica")

    def get_bucket_dict(self, bucket_type, bucket_durability):

        bucket_dict = deepcopy(self.bucket_template)
        bucket_dict[Bucket.bucketType] = bucket_type
        bucket_dict[Bucket.durabilityMinLevel] = \
            BucketDurability[bucket_durability]

        return bucket_dict

    def get_supported_durability_for_bucket(self):
        if self.bucket_type == Bucket.Type.EPHEMERAL:
            return [Bucket.DurabilityLevel.NONE,
                    Bucket.DurabilityLevel.MAJORITY]
        return self.bucket_util.get_supported_durability_levels()

    def validate_durability_with_crud(
            self, bucket, bucket_durability,
            verification_dict,
            doc_start_index=0,
            num_items_to_load=1, op_type="create",
            doc_durability=Bucket.DurabilityLevel.NONE):
        """
        Common API to validate durability settings of the bucket is set
        correctly or not.

        :param bucket: Bucket object to validate
        :param bucket_durability: Durability set for the bucket
                                  Note: Need this because the string within the
                                        bucket object is different than this.
        :param verification_dict: To hold the values for req cbstats to verify
        :param doc_start_index: Starting index to be considered for doc_load
        :param num_items_to_load: Number of items to be loaded to test.
                                  Default is '1'
        :param op_type: Type of CRUD to perform. Default is 'create'
        :param doc_durability: Document durability level to use during CRUD.
                               Default level is 'None'
        :return:
        """
        def get_d_level_used():
            if self.d_level_order.index(bucket_durability) \
                    < self.d_level_order.index(doc_durability):
                return doc_durability
            return bucket_durability

        d_level_to_test = get_d_level_used()
        # Nothing to test for durability_level=None (async_write case)
        if d_level_to_test == Bucket.DurabilityLevel.NONE:
            return

        self.log.info("Performing %s operation to validate d_level %s"
                      % (op_type, d_level_to_test))

        # Can't simulate error conditions for all durability_levels.
        # So only perform CRUD without error_sim
        if len(self.vbs_in_node.keys()) > 1:
            # Pick a random node to perform error sim and load
            random_node = choice(self.vbs_in_node.keys())

            target_vb_type, simulate_error = \
                self.durability_helper.get_vb_and_error_type(d_level_to_test)

            doc_gen = doc_generator(
                self.key, doc_start_index, num_items_to_load,
                target_vbucket=self.vbs_in_node[random_node][target_vb_type])
            error_sim = CouchbaseError(self.log,
                                       self.vbs_in_node[random_node]["shell"])

            doc_load_task = self.task.async_load_gen_docs(
                self.cluster, bucket, doc_gen, op_type,
                exp=self.maxttl,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=doc_durability,
                timeout_secs=32,
                batch_size=1,
                skip_read_on_error=True,
                suppress_error_table=True,
                start_task=False,
                sdk_client_pool=self.sdk_client_pool)

            self.sleep(5, "Wait for sdk_client to get warmed_up")
            # Simulate target error condition
            error_sim.create(simulate_error)
            self.sleep(5, "Wait for error_sim to take effect")

            # Start doc_loading task and wait for it to complete
            self.task_manager.add_new_task(doc_load_task)
            self.task_manager.get_task_result(doc_load_task)

            # Revert the induced error condition
            self.sleep(5, "Wait before reverting error_simulation")
            error_sim.revert(simulate_error)

            # Validate failed doc count and exception type from SDK
            if not doc_load_task.fail.keys():
                self.log_failure("Docs inserted without honoring the "
                                 "bucket durability level")
            for key, result in doc_load_task.fail.items():
                if SDKException.DurabilityAmbiguousException \
                        not in str(result["error"]):
                    self.log_failure("Invalid exception for key %s "
                                     "during %s operation: %s"
                                     % (key, op_type, result["error"]))

            verification_dict["sync_write_aborted_count"] += num_items_to_load
        else:
            doc_gen = doc_generator(self.key, doc_start_index,
                                    doc_start_index+num_items_to_load)

        # Retry the same CRUDs without any error simulation in place
        doc_load_task = self.task.async_load_gen_docs(
            self.cluster, bucket, doc_gen, op_type,
            exp=self.maxttl,
            durability=doc_durability,
            timeout_secs=2,
            batch_size=1,
            sdk_client_pool=self.sdk_client_pool)
        self.task_manager.get_task_result(doc_load_task)
        if doc_load_task.fail:
            self.log_failure("Failures seen during CRUD without "
                             "error simulation. Keys failed: %s"
                             % doc_load_task.fail.keys())
        else:
            verification_dict["ops_%s" % op_type] += \
                num_items_to_load
            verification_dict["sync_write_committed_count"] += \
                num_items_to_load

    def getTargetNodes(self):
        def select_randam_node(nodes):
            rand_node_index = randint(1, self.nodes_init-1)
            if self.cluster.nodes_in_cluster[rand_node_index] not in node_list:
                nodes.append(self.cluster.nodes_in_cluster[rand_node_index])

        node_list = list()
        if len(self.cluster.nodes_in_cluster) > 1:
            # Choose random nodes, if the cluster is not a single node cluster
            while len(node_list) != self.num_nodes_affected:
                select_randam_node(node_list)
        else:
            node_list.append(self.cluster.master)
        return node_list

    def cb_stat_verify(self, verification_dict):
        failed = self.durability_helper.verify_vbucket_details_stats(
            self.bucket_util.buckets[0],
            self.kv_nodes,
            vbuckets=self.cluster_util.vbuckets,
            expected_val=verification_dict)
        if failed:
            self.log_failure("Cbstat vbucket-details validation failed")
        self.summary.add_step("Cbstat vb-details validation")
示例#13
0
    def test_index_with_aborts(self):
        """
        1. Create index (2i/view) on default bucket
        2. Load multiple docs such that all sync_writes will be aborted
        3. Verify nothing went into indexing
        4. Load sync_write docs such that they are successful
        5. Validate the mutated docs are taken into indexing
        :return:
        """

        crud_batch_size = 50
        def_bucket = self.cluster.buckets[0]
        kv_nodes = self.cluster_util.get_kv_nodes(self.cluster)
        replica_vbs = dict()
        verification_dict = dict()
        index_item_count = dict()
        expected_num_indexed = dict()
        load_gen = dict()
        load_gen["ADD"] = dict()
        load_gen["SET"] = dict()
        partial_aborts = ["initial_aborts", "aborts_at_end"]

        durability_helper = DurabilityHelper(
            self.log,
            len(self.cluster.nodes_in_cluster),
            durability=self.durability_level,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to)

        if self.create_index_during == "before_doc_ops":
            self.create_gsi_indexes(def_bucket)

        curr_items = self.bucket_util.get_bucket_current_item_count(
            self.cluster, def_bucket)
        if self.sync_write_abort_pattern in ["all_aborts", "initial_aborts"]:
            self.bucket_util.flush_bucket(self.cluster, def_bucket)
            self.num_items = 0
        else:
            self.num_items = curr_items

        self.log.info("Disabling auto_failover to avoid node failures")
        status = RestConnection(self.cluster.master) \
            .update_autofailover_settings(False, 120)
        self.assertTrue(status, msg="Failure during disabling auto-failover")

        # Validate vbucket stats
        verification_dict["ops_create"] = self.num_items
        verification_dict["ops_update"] = 0
        # verification_dict["ops_delete"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["sync_write_committed_count"] = 0

        index_item_count["#primary"] = self.num_items
        index_item_count["durable_add_aborts"] = 0
        index_item_count["durable_set_aborts"] = 0
        expected_num_indexed["#primary"] = curr_items
        expected_num_indexed["durable_add_aborts"] = 0
        expected_num_indexed["durable_set_aborts"] = 0

        if self.create_index_during == "before_doc_ops":
            self.validate_indexed_doc_count(def_bucket, index_item_count)

        self.log.info("Loading docs such that all sync_writes will be aborted")
        for server in kv_nodes:
            ssh_shell = RemoteMachineShellConnection(server)
            cbstats = Cbstats(server)
            replica_vbs[server] = cbstats.vbucket_list(def_bucket.name,
                                                       "replica")
            load_gen["ADD"][server] = list()
            load_gen["ADD"][server].append(
                doc_generator(self.key,
                              0,
                              crud_batch_size,
                              target_vbucket=replica_vbs[server],
                              mutation_type="ADD"))
            if self.sync_write_abort_pattern in partial_aborts:
                load_gen["ADD"][server].append(
                    doc_generator(self.key,
                                  10000,
                                  crud_batch_size,
                                  target_vbucket=replica_vbs[server],
                                  mutation_type="ADD"))
                verification_dict["ops_create"] += crud_batch_size
                verification_dict["sync_write_committed_count"] += \
                    crud_batch_size
                index_item_count["#primary"] += crud_batch_size
                index_item_count["durable_add_aborts"] += crud_batch_size
                expected_num_indexed["#primary"] += crud_batch_size
                expected_num_indexed["durable_add_aborts"] += crud_batch_size

            task_success = self.bucket_util.load_durable_aborts(
                ssh_shell, load_gen["ADD"][server], self.cluster, def_bucket,
                self.durability_level, DocLoading.Bucket.DocOps.CREATE,
                self.sync_write_abort_pattern)
            if not task_success:
                self.log_failure("Failure during load_abort task")

            verification_dict["sync_write_aborted_count"] += \
                crud_batch_size
            if self.create_index_during == "before_doc_ops":
                self.validate_indexed_doc_count(def_bucket, index_item_count)

            load_gen["SET"][server] = list()
            load_gen["SET"][server].append(
                doc_generator(self.key,
                              0,
                              crud_batch_size,
                              target_vbucket=replica_vbs[server],
                              mutation_type="SET"))
            if self.sync_write_abort_pattern in partial_aborts:
                load_gen["SET"][server].append(
                    doc_generator(self.key,
                                  10000,
                                  crud_batch_size,
                                  target_vbucket=replica_vbs[server],
                                  mutation_type="SET"))
                verification_dict["ops_update"] += crud_batch_size
                verification_dict["sync_write_committed_count"] += \
                    crud_batch_size
                index_item_count["durable_add_aborts"] -= crud_batch_size
                index_item_count["durable_set_aborts"] += crud_batch_size
                expected_num_indexed["#primary"] += crud_batch_size
                expected_num_indexed["durable_add_aborts"] += crud_batch_size
                expected_num_indexed["durable_set_aborts"] += crud_batch_size

            verification_dict["sync_write_aborted_count"] += \
                crud_batch_size
            task_success = self.bucket_util.load_durable_aborts(
                ssh_shell, load_gen["SET"][server], self.cluster, def_bucket,
                self.durability_level, DocLoading.Bucket.DocOps.UPDATE,
                self.sync_write_abort_pattern)
            if not task_success:
                self.log_failure("Failure during load_abort task")

            ssh_shell.disconnect()

            if self.create_index_during == "before_doc_ops":
                self.validate_indexed_doc_count(def_bucket, index_item_count)
        failed = durability_helper.verify_vbucket_details_stats(
            def_bucket,
            kv_nodes,
            vbuckets=self.cluster.vbuckets,
            expected_val=verification_dict)
        if failed:
            self.log_failure("Cbstat vbucket-details verification failed")
        self.validate_test_failure()

        if self.create_index_during == "after_doc_ops":
            self.create_gsi_indexes(def_bucket)
            self.validate_indexed_doc_count(def_bucket, index_item_count)

        self.log.info("Verify aborts are not indexed")
        self.validate_indexed_count_from_stats(def_bucket,
                                               expected_num_indexed,
                                               index_item_count)

        if not self.use_gsi_for_primary:
            self.log.info("Wait of any indexing_activity to complete")
            index_monitor_task = self.cluster_util.async_monitor_active_task(
                self.cluster.master,
                "indexer",
                "_design/ddl_#primary",
                num_iteration=20,
                wait_task=True)[0]
            self.task_manager.get_task_result(index_monitor_task)
            self.assertTrue(index_monitor_task.result,
                            "Indexer task still running on server")

        for server in kv_nodes:
            if self.sync_write_abort_pattern == "initial_aborts":
                load_gen["ADD"][server] = load_gen["ADD"][server][:1]
                load_gen["SET"][server] = load_gen["SET"][server][:1]
            elif self.sync_write_abort_pattern == "aborts_at_end":
                load_gen["ADD"][server] = load_gen["ADD"][server][-1:]
                load_gen["SET"][server] = load_gen["SET"][server][-1:]

        self.log.info("Load sync_write docs such that they are successful")
        for server in kv_nodes:
            for gen_load in load_gen["ADD"][server]:
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    def_bucket,
                    gen_load,
                    "create",
                    0,
                    batch_size=50,
                    process_concurrency=8,
                    replicate_to=self.replicate_to,
                    persist_to=self.persist_to,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout)
                self.task.jython_task_manager.get_task_result(task)

                if len(task.fail.keys()) != 0:
                    self.log_failure("Some failures seen during doc_ops")

                index_item_count["#primary"] += crud_batch_size
                index_item_count["durable_add_aborts"] += crud_batch_size
                expected_num_indexed["#primary"] += crud_batch_size
                expected_num_indexed["durable_add_aborts"] += crud_batch_size
                self.validate_indexed_doc_count(def_bucket, index_item_count)

            for gen_load in load_gen["SET"][server]:
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    def_bucket,
                    gen_load,
                    "update",
                    0,
                    batch_size=50,
                    process_concurrency=8,
                    replicate_to=self.replicate_to,
                    persist_to=self.persist_to,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout)
                self.task.jython_task_manager.get_task_result(task)

                if len(task.fail.keys()) != 0:
                    self.log_failure("Some failures seen during doc_ops")

                index_item_count["durable_add_aborts"] -= crud_batch_size
                index_item_count["durable_set_aborts"] += crud_batch_size
                expected_num_indexed["#primary"] += crud_batch_size
                expected_num_indexed["durable_add_aborts"] += crud_batch_size
                expected_num_indexed["durable_set_aborts"] += crud_batch_size
                self.validate_indexed_doc_count(def_bucket, index_item_count)

        self.log.info("Validate the mutated docs are taken into indexing")
        self.validate_indexed_count_from_stats(def_bucket,
                                               expected_num_indexed,
                                               index_item_count)
        self.validate_test_failure()
示例#14
0
    def setUp(self):
        super(CrashTest, self).setUp()

        self.doc_ops = self.input.param("doc_ops", None)
        self.process_name = self.input.param("process", None)
        self.service_name = self.input.param("service", "data")
        self.sig_type = self.input.param("sig_type", "SIGKILL").upper()
        self.target_node = self.input.param("target_node", "active")
        self.client_type = self.input.param("client_type", "sdk").lower()
        self.N1qltxn = self.input.param("N1qltxn", False)

        self.pre_warmup_stats = dict()
        self.timeout = 120
        self.new_docs_to_add = 10000

        if self.doc_ops is not None:
            self.doc_ops = self.doc_ops.split(";")

        if not self.atomicity:
            self.durability_helper = DurabilityHelper(
                self.log, self.nodes_init,
                durability=self.durability_level,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to)

        verification_dict = dict()
        verification_dict["ops_create"] = \
            self.cluster.buckets[0].scopes[
                CbServer.default_scope].collections[
                CbServer.default_collection].num_items
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["pending_writes"] = 0
        if self.durability_level:
            verification_dict["sync_write_committed_count"] = \
                verification_dict["ops_create"]

        # Load initial documents into the buckets
        transaction_gen_create = doc_generator(
            "transaction_key", 0, self.num_items,
            key_size=self.key_size,
            doc_size=self.doc_size,
            doc_type=self.doc_type,
            target_vbucket=self.target_vbucket,
            vbuckets=self.cluster_util.vbuckets)
        gen_create = doc_generator(
            self.key, 0, self.num_items,
            key_size=self.key_size,
            doc_size=self.doc_size,
            doc_type=self.doc_type,
            target_vbucket=self.target_vbucket,
            vbuckets=self.cluster_util.vbuckets)
        if self.atomicity:
            transaction_task = self.task.async_load_gen_docs_atomicity(
                self.cluster, self.cluster.buckets,
                transaction_gen_create, DocLoading.Bucket.DocOps.CREATE,
                exp=0,
                batch_size=10,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                update_count=self.update_count,
                transaction_timeout=self.transaction_timeout,
                commit=True,
                sync=self.sync)
            self.task.jython_task_manager.get_task_result(transaction_task)
        for bucket in self.cluster.buckets:
            task = self.task.async_load_gen_docs(
                self.cluster, bucket, gen_create,
                DocLoading.Bucket.DocOps.CREATE, self.maxttl,
                persist_to=self.persist_to,
                replicate_to=self.replicate_to,
                durability=self.durability_level,
                batch_size=10, process_concurrency=8)
            self.task.jython_task_manager.get_task_result(task)
            self.bucket_util._wait_for_stats_all_buckets(self.cluster.buckets)

            self.cluster.buckets[0].scopes[
                CbServer.default_scope].collections[
                CbServer.default_collection].num_items += self.num_items
            verification_dict["ops_create"] += self.num_items
            if self.durability_level:
                verification_dict["sync_write_committed_count"] += \
                    self.num_items
            # Verify cbstats vbucket-details
            stats_failed = self.durability_helper.verify_vbucket_details_stats(
                bucket, self.cluster_util.get_kv_nodes(),
                vbuckets=self.cluster_util.vbuckets,
                expected_val=verification_dict)

            if self.atomicity is False:
                if stats_failed:
                    self.fail("Cbstats verification failed")
                self.bucket_util.verify_stats_all_buckets(
                    self.cluster,
                    self.cluster.buckets[0].scopes[
                        CbServer.default_scope].collections[
                        CbServer.default_collection].num_items)
        self.bucket = self.cluster.buckets[0]
        if self.N1qltxn:
            self.n1ql_server = self.cluster_util.get_nodes_from_services_map(
                                service_type="n1ql",
                                get_all_nodes=True)
            self.n1ql_helper = N1QLHelper(server=self.n1ql_server,
                                          use_rest=True,
                                          buckets=self.cluster.buckets,
                                          log=self.log,
                                          scan_consistency='REQUEST_PLUS',
                                          num_collection=3,
                                          num_buckets=1,
                                          num_savepoints=1,
                                          override_savepoint=False,
                                          num_stmt=10,
                                          load_spec=self.data_spec_name)
            self.bucket_col = self.n1ql_helper.get_collections()
            self.stmts = self.n1ql_helper.get_stmt(self.bucket_col)
            self.stmts = self.n1ql_helper.create_full_stmts(self.stmts)
        self.log.info("==========Finished CrashTest setup========")
示例#15
0
    def setUp(self):
        super(CrashTest, self).setUp()

        self.doc_ops = self.input.param("doc_ops", None)
        self.process_name = self.input.param("process", None)
        self.service_name = self.input.param("service", "data")
        self.sig_type = self.input.param("sig_type", "SIGKILL").upper()
        self.target_node = self.input.param("target_node", "active")

        self.pre_warmup_stats = {}
        self.timeout = 120
        self.new_docs_to_add = 10000

        if self.doc_ops is not None:
            self.doc_ops = self.doc_ops.split(";")

        nodes_init = self.cluster.servers[1:self.nodes_init] \
            if self.nodes_init != 1 else []
        self.task.rebalance([self.cluster.master], nodes_init, [])
        self.cluster.nodes_in_cluster.extend([self.cluster.master] +
                                             nodes_init)
        if not self.atomicity:
            self.durability_helper = DurabilityHelper(
                self.log,
                self.nodes_init,
                durability=self.durability_level,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to)
        self.bucket_util.create_default_bucket(
            bucket_type=self.bucket_type,
            ram_quota=self.bucket_size,
            replica=self.num_replicas,
            compression_mode="off",
            storage=self.bucket_storage,
            eviction_policy=self.bucket_eviction_policy)
        self.bucket_util.add_rbac_user()

        if self.sdk_client_pool:
            self.log.info("Creating SDK clients for client_pool")
            for bucket in self.bucket_util.buckets:
                self.sdk_client_pool.create_clients(
                    bucket, [self.cluster.master],
                    self.sdk_pool_capacity,
                    compression_settings=self.sdk_compression)

        verification_dict = dict()
        verification_dict["ops_create"] = self.num_items
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["pending_writes"] = 0
        if self.durability_level:
            verification_dict["sync_write_committed_count"] = self.num_items

        # Load initial documents into the buckets
        self.log.info("Loading initial documents")
        gen_create = doc_generator(self.key,
                                   0,
                                   self.num_items,
                                   key_size=self.key_size,
                                   doc_size=self.doc_size,
                                   doc_type=self.doc_type,
                                   target_vbucket=self.target_vbucket,
                                   vbuckets=self.cluster_util.vbuckets)
        if self.atomicity:
            task = self.task.async_load_gen_docs_atomicity(
                self.cluster,
                self.bucket_util.buckets,
                gen_create,
                "create",
                exp=0,
                batch_size=10,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                update_count=self.update_count,
                transaction_timeout=self.transaction_timeout,
                commit=True,
                sync=self.sync)
            self.task.jython_task_manager.get_task_result(task)
        else:
            for bucket in self.bucket_util.buckets:
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    bucket,
                    gen_create,
                    DocLoading.Bucket.DocOps.CREATE,
                    self.maxttl,
                    persist_to=self.persist_to,
                    replicate_to=self.replicate_to,
                    durability=self.durability_level,
                    batch_size=10,
                    process_concurrency=8,
                    sdk_client_pool=self.sdk_client_pool)
                self.task.jython_task_manager.get_task_result(task)

                self.bucket_util._wait_for_stats_all_buckets()
                # Verify cbstats vbucket-details
                stats_failed = \
                    self.durability_helper.verify_vbucket_details_stats(
                        bucket, self.cluster_util.get_kv_nodes(),
                        vbuckets=self.cluster_util.vbuckets,
                        expected_val=verification_dict)

                if stats_failed:
                    self.fail("Cbstats verification failed")

            self.bucket_util.verify_stats_all_buckets(self.num_items)
        self.cluster_util.print_cluster_stats()
        self.bucket_util.print_bucket_stats()
        self.log.info("==========Finished CrashTest setup========")
    def test_with_persistence_issues(self):
        """
        Test to make sure timeout is handled in durability calls
        and document CRUDs are successful even with disk related failures

        1. Select nodes from the cluster to simulate the specified error
        2. Perform CRUD on the target bucket with given timeout
        3. Using cbstats to verify the operation succeeds
        4. Validate all mutations are succeeded

        Note: self.sdk_timeout value is considered as 'seconds'
        """

        if self.durability_level in [
                Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE,
                Bucket.DurabilityLevel.PERSIST_TO_MAJORITY
        ]:
            self.log.critical("Test not valid for persistence durability")
            return

        error_sim = dict()
        shell_conn = dict()
        cbstat_obj = dict()
        failover_info = dict()
        vb_info_info = dict()
        active_vbs_in_target_nodes = list()
        failover_info["init"] = dict()
        failover_info["afterCrud"] = dict()
        vb_info_info["init"] = dict()
        vb_info_info["afterCrud"] = dict()

        self.log.info("Selecting nodes to simulate error condition")
        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)

        self.log.info("Simulate error condition on %s" % target_nodes)
        for node in target_nodes:
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(shell_conn[node.ip])
            active_vbs_in_target_nodes += cbstat_obj[node.ip].vbucket_list(
                self.bucket.name, "active")
            vb_info_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                self.bucket.name)
            failover_info["init"][node.ip] = \
                cbstat_obj[node.ip].failover_stats(self.bucket.name)

        if self.simulate_error \
                in [DiskError.DISK_FULL, DiskError.DISK_FAILURE]:
            error_sim = DiskError(self.log,
                                  self.task_manager,
                                  self.cluster.master,
                                  target_nodes,
                                  60,
                                  0,
                                  False,
                                  120,
                                  disk_location="/data")
            error_sim.create(action=self.simulate_error)
        else:
            for node in target_nodes:
                # Create shell_connections
                shell_conn[node.ip] = RemoteMachineShellConnection(node)

                # Perform specified action
                error_sim[node.ip] = CouchbaseError(self.log,
                                                    shell_conn[node.ip])
                error_sim[node.ip].create(self.simulate_error,
                                          bucket_name=self.bucket.name)

        # Perform CRUDs with induced error scenario is active
        load_spec = dict()
        load_spec["doc_crud"] = dict()
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 100
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.UPDATE_PERCENTAGE_PER_COLLECTION] = 25
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.DELETE_PERCENTAGE_PER_COLLECTION] = 25
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.COMMON_DOC_KEY] = "test_collections"

        self.log.info("Perform 'create', 'update', 'delete' mutations")
        doc_loading_task = \
            self.bucket_util.run_scenario_from_spec(
                self.task,
                self.cluster,
                self.bucket_util.buckets,
                load_spec,
                mutation_num=1,
                async_load=True)

        # Perform new scope/collection creation during doc ops in parallel
        self.__perform_collection_crud(mutation_num=2)

        # Wait for doc_loading to complete and validate the doc ops
        self.task_manager.get_task_result(doc_loading_task)
        self.bucket_util.validate_doc_loading_results(doc_loading_task)
        if doc_loading_task.result is False:
            self.log_failure("Doc CRUDs failed with persistence issue")

        if self.simulate_error \
                in [DiskError.DISK_FULL, DiskError.DISK_FAILURE]:
            error_sim.revert(self.simulate_error)
        else:
            # Revert the induced error condition
            for node in target_nodes:
                error_sim[node.ip].revert(self.simulate_error,
                                          bucket_name=self.bucket.name)

                # Disconnect the shell connection
                shell_conn[node.ip].disconnect()
            self.sleep(10, "Wait for node recovery to complete")

        # Doc count validation
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.validate_docs_per_collections_all_buckets()

        # Fetch latest failover stats and validate the values are updated
        self.log.info("Validating failover and seqno cbstats")
        for node in target_nodes:
            vb_info_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].vbucket_seqno(self.bucket.name)
            failover_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].failover_stats(self.bucket.name)

            # Failover validation
            val = \
                failover_info["init"][node.ip] \
                == failover_info["afterCrud"][node.ip]
            error_msg = "Failover stats got updated"
            self.assertTrue(val, msg=error_msg)

            # Seq_no validation (High level)
            val = \
                vb_info_info["init"][node.ip] \
                != vb_info_info["afterCrud"][node.ip]
            self.assertTrue(val, msg="vbucket seq_no not updated after CRUDs")

        self.validate_test_failure()

        # Doc count validation
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.validate_docs_per_collections_all_buckets()
    def test_collection_not_exists(self):
        """
        1. Load docs into required collection
        2. Validate docs based on the targeted collection
        3. Create non-default scope/collection for CRUDs to happen
        4. Perform doc_ops again and perform CRUDs
        5. Drop the target collection and validate the CollectionNotExists
           exception from client side
        6. Recreate non-default collection and re-create the docs and validate
        """
        def validate_vb_detail_stats():
            failed = durability_helper.verify_vbucket_details_stats(
                self.bucket,
                self.cluster_util.get_kv_nodes(),
                vbuckets=self.cluster_util.vbuckets,
                expected_val=verification_dict)
            if failed:
                self.log_failure("vBucket_details validation failed")
            self.bucket_util.validate_docs_per_collections_all_buckets()

        num_cols_in_bucket = 0
        for _, scope in self.bucket.scopes.items():
            for _, _ in scope.collections.items():
                num_cols_in_bucket += 1

        verification_dict = dict()
        verification_dict["ops_create"] = num_cols_in_bucket * self.num_items
        verification_dict["ops_update"] = 0
        verification_dict["ops_delete"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["sync_write_committed_count"] = 0

        durability_helper = DurabilityHelper(self.log,
                                             len(self.cluster.kv_nodes),
                                             durability=self.durability_level)

        drop_scope = self.input.param("drop_scope", False)
        if self.scope_name != CbServer.default_scope:
            self.scope_name = self.bucket_util.get_random_name()
        if self.collection_name != CbServer.default_collection:
            self.collection_name = self.bucket_util.get_random_name()

        # Doc generator used for mutations
        doc_gen = doc_generator("test_col_not_exists", 0, 10)

        # Acquire SDK client for mutations
        client = self.sdk_client_pool.get_client_for_bucket(
            self.bucket, self.scope_name, self.collection_name)

        doc_ttl, _ = \
            SDKExceptionTests.__get_random_doc_ttl_and_durability_level()
        self.log.info(
            "Creating docs with doc_ttl %s into %s:%s:%s" %
            (doc_ttl, self.bucket.name, self.scope_name, self.collection_name))

        while doc_gen.has_next():
            key, value = doc_gen.next()
            result = client.crud("create",
                                 key,
                                 value,
                                 exp=doc_ttl,
                                 durability=self.durability_level,
                                 timeout=30)
            if self.collection_name == CbServer.default_collection:
                if result["status"] is False:
                    self.log_failure("Create doc failed for key: %s" % key)
                else:
                    verification_dict["ops_create"] += 1
                    if self.durability_level:
                        verification_dict["sync_write_committed_count"] += 1
                    self.bucket.scopes[self.scope_name].collections[
                        self.collection_name].num_items += 1
            elif result["status"] is True:
                self.log_failure("Create didn't fail as expected for key: %s" %
                                 key)
            elif SDKException.AmbiguousTimeoutException \
                    not in str(result["error"]) \
                    or SDKException.RetryReason.COLLECTION_NOT_FOUND \
                    not in str(result["error"]):
                self.log_failure("Invalid exception for key %s: %s" %
                                 (key, result["error"]))

        validate_vb_detail_stats()
        # Create required scope/collection for successful CRUD operation
        self.create_scope_collection()

        # Reset doc_gen itr value for retry purpose
        doc_gen.reset()
        doc_ttl, _ = \
            SDKExceptionTests.__get_random_doc_ttl_and_durability_level()
        self.log.info(
            "Creating docs with doc_ttl %s into %s:%s:%s" %
            (doc_ttl, self.bucket.name, self.scope_name, self.collection_name))
        op_type = "create"
        if self.collection_name == CbServer.default_collection:
            op_type = "update"

        while doc_gen.has_next():
            key, value = doc_gen.next()
            result = client.crud(op_type,
                                 key,
                                 value,
                                 exp=doc_ttl,
                                 durability=self.durability_level)
            if result["status"] is False:
                self.log_failure("Create fail for key %s: %s" % (key, result))
            else:
                if op_type == "create":
                    verification_dict["ops_create"] += 1
                    self.bucket.scopes[self.scope_name].collections[
                        self.collection_name].num_items += 1
                else:
                    verification_dict["ops_update"] += 1

                if self.durability_level:
                    verification_dict["sync_write_committed_count"] += 1
        validate_vb_detail_stats()
        self.validate_test_failure()

        if drop_scope:
            self.log.info("Dropping scope %s" % self.scope_name)
            self.bucket_util.drop_scope(self.cluster.master, self.bucket,
                                        self.scope_name)
        else:
            self.log.info("Dropping collection %s:%s" %
                          (self.scope_name, self.collection_name))
            self.bucket_util.drop_collection(self.cluster.master, self.bucket,
                                             self.scope_name,
                                             self.collection_name)
        validate_vb_detail_stats()
        self.validate_test_failure()

        # Reset doc_gen itr value for retry purpose
        doc_gen.reset()
        while doc_gen.has_next():
            key, value = doc_gen.next()
            result = client.crud("create",
                                 key,
                                 value,
                                 exp=doc_ttl,
                                 durability=self.durability_level)
            if result["status"] is True:
                self.log_failure("Create doc succeeded for dropped collection")
        validate_vb_detail_stats()
        self.validate_test_failure()

        # Re-create the dropped collection
        self.create_scope_collection(create_scope=drop_scope)

        if self.collection_name != CbServer.default_collection:
            doc_gen.reset()
            while doc_gen.has_next():
                key, value = doc_gen.next()
                result = client.crud("create",
                                     key,
                                     value,
                                     exp=doc_ttl,
                                     durability=self.durability_level)
                if result["status"] is False:
                    self.log_failure("Create failed after collection recreate "
                                     "for key %s: %s" % (key, result["error"]))
                else:
                    verification_dict["ops_create"] += 1
                    if self.durability_level:
                        verification_dict["sync_write_committed_count"] += 1
                    self.bucket.scopes[self.scope_name].collections[
                        self.collection_name].num_items += 1
            validate_vb_detail_stats()

        # Release the acquired client
        self.sdk_client_pool.release_client(client)
        self.validate_test_failure()
    def test_timeout_with_crud_failures(self):
        """
        Test to make sure timeout is handled in durability calls
        and no documents are loaded when durability cannot be met using
        error simulation in server node side

        This will validate failure in majority of nodes, where durability will
        surely fail for all CRUDs

        1. Select a node from the cluster to simulate the specified error
        2. Perform CRUD on the target bucket with given timeout
        3. Using cbstats to verify no operations succeeds
        4. Revert the error scenario from the cluster to resume durability
        5. Validate all mutations are succeeded after reverting
           the error condition

        Note: self.sdk_timeout values is considered as 'seconds'
        """

        # Local methods to validate vb_seqno

        def compare_vb_stat(stat_1, stat_2, vb, comparison="!="):
            keys_to_check = ["high_seqno", "high_completed_seqno"]
            result = True
            for key in keys_to_check:
                if vb in stat_1.keys():
                    if stat_1[vb]["uuid"] != stat_2[vb]["uuid"]:
                        self.log_failure(
                            "Mismatch in vb-%s UUID. %s != %s" %
                            (vb, stat_1[vb]["uuid"], stat_2[vb]["uuid"]))
                    if comparison == "!=":
                        if stat_1[vb][key] != stat_2[vb][key]:
                            result = False
                            self.log.warning(
                                "Mismatch in vb-%s stat %s. %s != %s" %
                                (vb, key, stat_1[vb][key], stat_2[vb][key]))
                    elif stat_1[vb][key] == stat_2[vb][key]:
                        result = False
                        self.log.warning(
                            "Stat not updated for vb-%s stat %s. "
                            "%s == %s" %
                            (vb, key, stat_1[vb][key], stat_2[vb][key]))
            return result

        def validate_vb_seqno_stats():
            """
            :return retry_validation: Boolean denoting to retry validation
            """
            retry_validation = False
            vb_info["post_timeout"][node.ip] = \
                cbstat_obj[node.ip].vbucket_seqno(self.bucket.name)
            for tem_vb_num in range(self.cluster_util.vbuckets):
                tem_vb_num = str(tem_vb_num)
                if tem_vb_num not in affected_vbs:
                    if compare_vb_stat(vb_info["init"][node.ip],
                                       vb_info["post_timeout"][node.ip],
                                       tem_vb_num) is False:
                        self.log_failure("Unaffected vb-%s stat" % tem_vb_num)
                elif int(tem_vb_num) in target_nodes_vbuckets["active"]:
                    if compare_vb_stat(vb_info["init"][node.ip],
                                       vb_info["post_timeout"][node.ip],
                                       tem_vb_num) is False:
                        self.log.warning("%s - mismatch in %s vb-%s seq_no" %
                                         (node.ip, "active", tem_vb_num))
                elif int(tem_vb_num) in target_nodes_vbuckets["replica"]:
                    if compare_vb_stat(vb_info["init"][node.ip],
                                       vb_info["post_timeout"][node.ip],
                                       tem_vb_num,
                                       comparison="==") is False:
                        retry_validation = True
                        self.log.warning("%s - mismatch in %s vb-%s seq_no" %
                                         (node.ip, "replica", tem_vb_num))
            return retry_validation

        shell_conn = dict()
        cbstat_obj = dict()
        error_sim = dict()
        target_nodes_vbuckets = dict()
        vb_info = dict()
        tasks = dict()
        doc_gen = dict()
        affected_vbs = list()

        target_nodes_vbuckets["active"] = []
        target_nodes_vbuckets["replica"] = []
        vb_info["init"] = dict()
        vb_info["post_timeout"] = dict()
        vb_info["afterCrud"] = dict()

        # Override crud_batch_size to minimum value for testing
        self.crud_batch_size = 5
        self.key = "test_collections"
        self.sdk_timeout = 3

        # Select target vbucket type to load_docs
        target_vb_type = "replica"
        if self.simulate_error == CouchbaseError.STOP_PERSISTENCE \
                and self.durability_level \
                == Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE:
            target_vb_type = "active"

        # Create required scope/collection for successful CRUD operation
        if self.scope_name != CbServer.default_scope:
            self.scope_name = self.bucket_util.get_random_name()
        self.collection_name = self.bucket_util.get_random_name()
        self.log.info("Creating scope::collection %s::%s" %
                      (self.scope_name, self.collection_name))
        self.create_scope_collection()

        # Load docs into created collection
        self.log.info("Loading data into created collection")
        load_gen = doc_generator(self.key, 0, self.num_items)
        task = self.task.async_load_gen_docs(
            self.cluster,
            self.bucket,
            load_gen,
            "create",
            0,
            scope=self.scope_name,
            collection=self.collection_name,
            sdk_client_pool=self.sdk_client_pool,
            batch_size=200,
            process_concurrency=8,
            timeout_secs=60)
        self.task_manager.get_task_result(task)
        if self.subdoc_test:
            load_gen = sub_doc_generator(self.key, 0, self.num_items / 2)
            task = self.task.async_load_gen_sub_docs(
                self.cluster,
                self.bucket,
                load_gen,
                Bucket_Op.SubDocOps.INSERT,
                timeout_secs=self.sdk_timeout,
                compression=self.sdk_compression,
                path_create=True,
                batch_size=100,
                process_concurrency=8,
                durability=self.durability_level,
                scope=self.scope_name,
                collection=self.collection_name,
                sdk_client_pool=self.sdk_client_pool)
            self.task_manager.get_task_result(task)

        self.bucket.scopes[self.scope_name].collections[
            self.collection_name].num_items = self.num_items

        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)
        for node in target_nodes:
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(shell_conn[node.ip])
            target_nodes_vbuckets["active"] += \
                cbstat_obj[node.ip].vbucket_list(self.bucket.name,
                                                 vbucket_type="active")
            target_nodes_vbuckets["replica"] += \
                cbstat_obj[node.ip].vbucket_list(self.bucket.name,
                                                 vbucket_type="replica")
            vb_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                self.bucket.name)
            error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip])

        curr_time = int(time.time())
        expected_timeout = curr_time + self.sdk_timeout

        if target_vb_type == "active":
            target_vbs = list(
                set(target_nodes_vbuckets[target_vb_type]).difference(
                    set(target_nodes_vbuckets["replica"])))
        else:
            target_vbs = list(
                set(target_nodes_vbuckets[target_vb_type]).difference(
                    set(target_nodes_vbuckets["active"])))

        # Create required doc_generators
        doc_gen["create"] = doc_generator(self.key,
                                          self.num_items,
                                          self.crud_batch_size,
                                          target_vbucket=target_vbs)
        doc_gen["delete"] = doc_generator(self.key,
                                          0,
                                          self.crud_batch_size,
                                          target_vbucket=target_vbs)
        doc_gen["read"] = doc_generator(self.key,
                                        int(self.num_items / 3),
                                        self.crud_batch_size,
                                        target_vbucket=target_vbs)
        doc_gen["update"] = doc_generator(self.key,
                                          int(self.num_items / 2),
                                          self.crud_batch_size,
                                          target_vbucket=target_vbs)

        # Create required subdoc generators
        doc_gen["insert"] = sub_doc_generator(self.key,
                                              int(self.num_items / 2),
                                              self.crud_batch_size,
                                              target_vbucket=target_vbs)
        doc_gen["upsert"] = sub_doc_generator_for_edit(
            self.key,
            0,
            self.crud_batch_size,
            template_index=1,
            target_vbucket=target_vbs)
        doc_gen["remove"] = sub_doc_generator(self.key,
                                              0,
                                              self.crud_batch_size,
                                              target_vbucket=target_vbs)

        # Perform specified action
        for node in target_nodes:
            error_sim[node.ip].create(self.simulate_error,
                                      bucket_name=self.bucket.name)
        self.sleep(5, "Wait for error_simulation to take effect")

        ops_to_perform = [
            Bucket_Op.DocOps.CREATE, Bucket_Op.DocOps.UPDATE,
            Bucket_Op.DocOps.READ, Bucket_Op.DocOps.DELETE
        ]
        if self.subdoc_test:
            ops_to_perform = [
                Bucket_Op.SubDocOps.INSERT, Bucket_Op.SubDocOps.UPSERT,
                Bucket_Op.SubDocOps.REMOVE
            ]

        for op_type in ops_to_perform:
            self.log.info("Starting doc op %s" % op_type)
            if op_type in Bucket_Op.DOC_OPS:
                tasks[op_type] = self.task.async_load_gen_docs(
                    self.cluster,
                    self.bucket,
                    doc_gen[op_type],
                    op_type,
                    0,
                    scope=self.scope_name,
                    collection=self.collection_name,
                    sdk_client_pool=self.sdk_client_pool,
                    batch_size=1,
                    process_concurrency=8,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout,
                    suppress_error_table=True,
                    print_ops_rate=False,
                    skip_read_on_error=True)
            else:
                tasks[op_type] = self.task.async_load_gen_sub_docs(
                    self.cluster,
                    self.bucket,
                    doc_gen[op_type],
                    op_type,
                    0,
                    scope=self.scope_name,
                    collection=self.collection_name,
                    sdk_client_pool=self.sdk_client_pool,
                    path_create=True,
                    batch_size=1,
                    process_concurrency=8,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout,
                    print_ops_rate=False)

            self.task.jython_task_manager.get_task_result(tasks[op_type])

            # Validate task failures
            if op_type == Bucket_Op.DocOps.READ:
                # Validation for read task
                if len(tasks[op_type].fail.keys()) != 0:
                    self.log_failure("Read failed for few docs: %s" %
                                     tasks[op_type].fail.keys())
            else:
                # Validation of CRUDs - Update / Create / Delete
                for doc_id, crud_result in tasks[op_type].fail.items():
                    vb_num = self.bucket_util.get_vbucket_num_for_key(
                        doc_id, self.cluster_util.vbuckets)
                    if SDKException.DurabilityAmbiguousException \
                            not in str(crud_result["error"]):
                        self.log_failure(
                            "Invalid exception for doc %s, vb %s: %s" %
                            (doc_id, vb_num, crud_result))

        # Revert the specified error scenario
        for node in target_nodes:
            error_sim[node.ip].revert(self.simulate_error,
                                      bucket_name=self.bucket.name)

        # Check whether the timeout triggered properly
        if int(time.time()) < expected_timeout:
            self.log_failure("Timed-out before expected time")

        for op_type in ops_to_perform:
            if op_type == Bucket_Op.DocOps.READ:
                continue
            while doc_gen[op_type].has_next():
                doc_id, _ = doc_gen[op_type].next()
                affected_vbs.append(
                    str(
                        self.bucket_util.get_vbucket_num_for_key(
                            doc_id, self.cluster_util.vbuckets)))

        affected_vbs = list(set(affected_vbs))
        # Fetch latest stats and validate the seq_nos are not updated
        for node in target_nodes:
            retry_count = 0
            max_retry = 3
            while retry_count < max_retry:
                self.log.info("Trying to validate vbseq_no stats: %d" %
                              (retry_count + 1))
                retry_count += 1
                retry_required = validate_vb_seqno_stats()
                if not retry_required:
                    break
                self.sleep(5, "Sleep for vbseq_no stats to update")
            else:
                # This will be exited only if `break` condition is not met
                self.log_failure("validate_vb_seqno_stats verification failed")

        self.validate_test_failure()

        # Get SDK Client from client_pool
        sdk_client = self.sdk_client_pool.get_client_for_bucket(
            self.bucket, self.scope_name, self.collection_name)

        # Doc error validation
        for op_type in ops_to_perform:
            task = tasks[op_type]

            if self.nodes_init == 1 \
                    and op_type != Bucket_Op.DocOps.READ \
                    and len(task.fail.keys()) != (doc_gen[op_type].end
                                                  - doc_gen[op_type].start):
                self.log_failure(
                    "Failed keys %d are less than expected %d" %
                    (len(task.fail.keys()),
                     (doc_gen[op_type].end - doc_gen[op_type].start)))

            # Create table objects for display
            table_view = TableView(self.log.error)
            ambiguous_table_view = TableView(self.log.info)
            table_view.set_headers(["Key", "vBucket", "Exception"])
            ambiguous_table_view.set_headers(["Key", "vBucket"])

            # Iterate failed keys for validation
            for doc_key, doc_info in task.fail.items():
                vb_for_key = self.bucket_util.get_vbucket_num_for_key(doc_key)

                if SDKException.DurabilityAmbiguousException \
                        not in str(doc_info["error"]):
                    table_view.add_row(
                        [doc_key, vb_for_key, doc_info["error"]])

                ambiguous_table_view.add_row([doc_key, str(vb_for_key)])
                if op_type not in Bucket_Op.SUB_DOC_OPS:
                    retry_success = \
                        self.durability_helper.retry_for_ambiguous_exception(
                            sdk_client, op_type, doc_key, doc_info)
                    if not retry_success:
                        self.log_failure("%s failed in retry for %s" %
                                         (op_type, doc_key))

            # Display the tables (if any errors)
            table_view.display("Unexpected exception during %s" % op_type)
            ambiguous_table_view.display("D_Ambiguous exception during %s" %
                                         op_type)

        # Release the acquired client
        self.sdk_client_pool.release_client(sdk_client)

        # Verify doc count after expected CRUD failure
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.validate_docs_per_collections_all_buckets()

        # Fetch latest stats and validate the values are updated
        for node in target_nodes:
            vb_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].vbucket_seqno(self.bucket.name)
            if vb_info["init"][node.ip] == vb_info["afterCrud"][node.ip]:
                self.log_failure("vBucket seq_no stats not updated")

        # Disconnect the shell connection
        for node in target_nodes:
            shell_conn[node.ip].disconnect()

        self.validate_test_failure()
    def test_timeout_with_successful_crud(self):
        """
        Test to make sure timeout is handled in durability calls
        and no documents are loaded when durability cannot be met using
        error simulation in server node side.

        This will validate failure in majority of nodes, where durability will
        surely fail for all CRUDs

        1. Select a node from the cluster to simulate the specified error
        2. Perform CRUD on the target bucket with given timeout
        3. Using cbstats to verify no operation succeeds
        4. Revert the error scenario from the cluster to resume durability
        5. Validate all mutations are succeeded after reverting
           the error condition

        Note: self.sdk_timeout values is considered as 'seconds'
        """

        shell_conn = dict()
        cbstat_obj = dict()
        error_sim = dict()
        vb_info = dict()
        vb_info["init"] = dict()
        vb_info["afterCrud"] = dict()

        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)
        for node in target_nodes:
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(shell_conn[node.ip])
            vb_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                self.bucket.name)
            error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip])

        doc_load_spec = dict()
        doc_load_spec[MetaCrudParams.SDK_TIMEOUT] = self.sdk_timeout
        doc_load_spec[MetaCrudParams.DURABILITY_LEVEL] = self.durability_level
        doc_load_spec["doc_crud"] = dict()
        doc_load_spec["subdoc_crud"] = dict()
        doc_load_spec["doc_crud"][MetaCrudParams.DocCrud.COMMON_DOC_KEY] = \
            "test_collections"
        doc_load_spec["doc_crud"][
            MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 0
        doc_load_spec["doc_crud"][
            MetaCrudParams.DocCrud.UPDATE_PERCENTAGE_PER_COLLECTION] = 0
        doc_load_spec["doc_crud"][
            MetaCrudParams.DocCrud.DELETE_PERCENTAGE_PER_COLLECTION] = 0

        doc_load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.INSERT_PER_COLLECTION] = 0
        doc_load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.UPSERT_PER_COLLECTION] = 0
        doc_load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.REMOVE_PER_COLLECTION] = 0

        ops_to_perform = ["create", "update", "read", "replace", "delete"]
        if self.subdoc_test:
            ops_to_perform = ["insert", "upsert", "remove"]

        for op_type in ops_to_perform:
            self.log.info("Performing '%s' with timeout=%s" %
                          (op_type, self.sdk_timeout))
            curr_spec = deepcopy(doc_load_spec)
            if op_type == "create":
                curr_spec["doc_crud"][
                    MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] \
                    = 5
            elif op_type == "update":
                curr_spec["doc_crud"][
                    MetaCrudParams.DocCrud.UPDATE_PERCENTAGE_PER_COLLECTION] \
                    = 5
            elif op_type == "delete":
                curr_spec["doc_crud"][
                    MetaCrudParams.DocCrud.DELETE_PERCENTAGE_PER_COLLECTION] \
                    = 5
            elif op_type == "read":
                curr_spec["doc_crud"][
                    MetaCrudParams.DocCrud.READ_PERCENTAGE_PER_COLLECTION] = 5
                curr_spec[MetaCrudParams.RETRY_EXCEPTIONS] = [
                    SDKException.TimeoutException
                ]
            elif op_type == "insert":
                curr_spec["subdoc_crud"][
                    MetaCrudParams.SubDocCrud.INSERT_PER_COLLECTION] = 5
            elif op_type == "upsert":
                curr_spec["subdoc_crud"][
                    MetaCrudParams.SubDocCrud.UPSERT_PER_COLLECTION] = 5
            elif op_type == "remove":
                curr_spec["subdoc_crud"][
                    MetaCrudParams.SubDocCrud.REMOVE_PER_COLLECTION] = 5

            doc_loading_task = \
                self.bucket_util.run_scenario_from_spec(
                    self.task,
                    self.cluster,
                    self.bucket_util.buckets,
                    curr_spec,
                    mutation_num=1,
                    async_load=True,
                    validate_task=False)

            # Perform specified action
            for node in target_nodes:
                error_sim[node.ip].create(self.simulate_error,
                                          bucket_name=self.bucket.name)

            self.sleep(10, "Wait before reverting the error condition")

            # Revert the specified error scenario
            for node in target_nodes:
                error_sim[node.ip].revert(self.simulate_error,
                                          bucket_name=self.bucket.name)

            self.task_manager.get_task_result(doc_loading_task)
            self.bucket_util.validate_doc_loading_results(doc_loading_task)
            if doc_loading_task.result is False:
                self.fail("Doc_loading for '%s' failed" % op_type)

            # Fetch latest stats and validate the values are updated
            for node in target_nodes:
                curr_stat = cbstat_obj[node.ip].vbucket_seqno(self.bucket.name)
                if vb_info["init"][node.ip] == curr_stat:
                    self.log_failure("vbucket_seqno not updated. %s == %s" %
                                     (vb_info["init"][node.ip], curr_stat))

        # Disconnect the shell connection
        for node in target_nodes:
            shell_conn[node.ip].disconnect()

        # Verify initial doc load count
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.validate_docs_per_collections_all_buckets()
        self.validate_test_failure()
示例#20
0
    def setUp(self):
        super(basic_ops, self).setUp()

        self.doc_ops = self.input.param("doc_ops", "").split(";")
        self.observe_test = self.input.param("observe_test", False)
        # Scope/collection name can be default or create a random one to test
        self.scope_name = self.input.param("scope", CbServer.default_scope)
        self.collection_name = self.input.param("collection",
                                                CbServer.default_collection)

        nodes_init = self.cluster.servers[1:self.nodes_init] \
            if self.nodes_init != 1 else []
        self.task.rebalance([self.cluster.master], nodes_init, [])
        self.cluster.nodes_in_cluster.extend([self.cluster.master] +
                                             nodes_init)
        self.bucket_util.create_default_bucket(
            replica=self.num_replicas,
            compression_mode=self.compression_mode,
            bucket_type=self.bucket_type,
            storage=self.bucket_storage,
            eviction_policy=self.bucket_eviction_policy)
        self.bucket_util.add_rbac_user()

        # Create Scope/Collection with random names if not equal to default
        if self.scope_name != CbServer.default_scope:
            self.scope_name = self.bucket_util.get_random_name()
            self.bucket_util.create_scope(self.cluster.master,
                                          self.bucket_util.buckets[0],
                                          {"name": self.scope_name})
        if self.collection_name != CbServer.default_collection:
            self.collection_name = self.bucket_util.get_random_name()
            self.bucket_util.create_collection(
                self.cluster.master, self.bucket_util.buckets[0],
                self.scope_name, {
                    "name": self.collection_name,
                    "num_items": self.num_items
                })
            self.log.info("Using scope::collection - '%s::%s'" %
                          (self.scope_name, self.collection_name))

        # Update required num_items under default collection
        self.bucket_util.buckets[0] \
            .scopes[self.scope_name] \
            .collections[self.collection_name] \
            .num_items = self.num_items

        self.durability_helper = DurabilityHelper(
            self.log,
            len(self.cluster.nodes_in_cluster),
            durability=self.durability_level,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to)

        # Create sdk_clients for pool
        if self.sdk_client_pool:
            self.log.info("Creating SDK client pool")
            self.sdk_client_pool.create_clients(
                self.bucket_util.buckets[0],
                self.cluster.nodes_in_cluster,
                req_clients=self.sdk_pool_capacity,
                compression_settings=self.sdk_compression)

        # Reset active_resident_threshold to avoid further data load as DGM
        self.active_resident_threshold = 0
        self.cluster_util.print_cluster_stats()
        self.bucket_util.print_bucket_stats()
        self.log.info("==========Finished Basic_ops base setup========")
示例#21
0
class basic_ops(BaseTestCase):
    def setUp(self):
        super(basic_ops, self).setUp()

        self.doc_ops = self.input.param("doc_ops", "").split(";")
        self.observe_test = self.input.param("observe_test", False)
        # Scope/collection name can be default or create a random one to test
        self.scope_name = self.input.param("scope", CbServer.default_scope)
        self.collection_name = self.input.param("collection",
                                                CbServer.default_collection)

        nodes_init = self.cluster.servers[1:self.nodes_init] \
            if self.nodes_init != 1 else []
        self.task.rebalance([self.cluster.master], nodes_init, [])
        self.cluster.nodes_in_cluster.extend([self.cluster.master] +
                                             nodes_init)
        self.bucket_util.create_default_bucket(
            replica=self.num_replicas,
            compression_mode=self.compression_mode,
            bucket_type=self.bucket_type,
            storage=self.bucket_storage,
            eviction_policy=self.bucket_eviction_policy)
        self.bucket_util.add_rbac_user()

        # Create Scope/Collection with random names if not equal to default
        if self.scope_name != CbServer.default_scope:
            self.scope_name = self.bucket_util.get_random_name()
            self.bucket_util.create_scope(self.cluster.master,
                                          self.bucket_util.buckets[0],
                                          {"name": self.scope_name})
        if self.collection_name != CbServer.default_collection:
            self.collection_name = self.bucket_util.get_random_name()
            self.bucket_util.create_collection(
                self.cluster.master, self.bucket_util.buckets[0],
                self.scope_name, {
                    "name": self.collection_name,
                    "num_items": self.num_items
                })
            self.log.info("Using scope::collection - '%s::%s'" %
                          (self.scope_name, self.collection_name))

        # Update required num_items under default collection
        self.bucket_util.buckets[0] \
            .scopes[self.scope_name] \
            .collections[self.collection_name] \
            .num_items = self.num_items

        self.durability_helper = DurabilityHelper(
            self.log,
            len(self.cluster.nodes_in_cluster),
            durability=self.durability_level,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to)

        # Create sdk_clients for pool
        if self.sdk_client_pool:
            self.log.info("Creating SDK client pool")
            self.sdk_client_pool.create_clients(
                self.bucket_util.buckets[0],
                self.cluster.nodes_in_cluster,
                req_clients=self.sdk_pool_capacity,
                compression_settings=self.sdk_compression)

        # Reset active_resident_threshold to avoid further data load as DGM
        self.active_resident_threshold = 0
        self.cluster_util.print_cluster_stats()
        self.bucket_util.print_bucket_stats()
        self.log.info("==========Finished Basic_ops base setup========")

    def tearDown(self):
        super(basic_ops, self).tearDown()

    def do_basic_ops(self):
        KEY_NAME = 'key1'
        KEY_NAME2 = 'key2'
        self.log.info('Starting basic ops')

        default_bucket = self.bucket_util.get_all_buckets()[0]
        sdk_client = SDKClient([self.cluster.master],
                               default_bucket,
                               compression_settings=self.sdk_compression)
        # mcd = client.memcached(KEY_NAME)

        # MB-17231 - incr with full eviction
        rc = sdk_client.incr(KEY_NAME, delta=1)
        self.log.info('rc for incr: {0}'.format(rc))

        # MB-17289 del with meta
        rc = sdk_client.set(KEY_NAME, 0, 0, json.dumps({'value': 'value2'}))
        self.log.info('set is: {0}'.format(rc))
        # cas = rc[1]

        # wait for it to persist
        persisted = 0
        while persisted == 0:
            opaque, rep_time, persist_time, persisted, cas = \
                sdk_client.observe(KEY_NAME)

        try:
            rc = sdk_client.evict_key(KEY_NAME)
        except MemcachedError as exp:
            self.fail("Exception with evict meta - {0}".format(exp))

        CAS = 0xabcd
        try:
            # key, exp, flags, seqno, cas
            rc = mcd.del_with_meta(KEY_NAME2, 0, 0, 2, CAS)
        except MemcachedError as exp:
            self.fail("Exception with del_with meta - {0}".format(exp))

    # Reproduce test case for MB-28078
    def do_setWithMeta_twice(self):
        mc = MemcachedClient(self.cluster.master.ip, constants.memcached_port)
        mc.sasl_auth_plain(self.cluster.master.rest_username,
                           self.cluster.master.rest_password)
        mc.bucket_select('default')

        try:
            mc.setWithMeta('1', '{"Hello":"World"}', 3600, 0, 1,
                           0x1512a3186faa0000)
        except MemcachedError as error:
            self.log.info("<MemcachedError #%d ``%s''>" %
                          (error.status, error.message))
            self.fail("Error on First setWithMeta()")

        stats = mc.stats()
        self.log.info('curr_items: {0} and curr_temp_items:{1}'.format(
            stats['curr_items'], stats['curr_temp_items']))
        self.sleep(5, "Wait before checking the stats")
        stats = mc.stats()
        self.log.info('curr_items: {0} and curr_temp_items:{1}'.format(
            stats['curr_items'], stats['curr_temp_items']))

        try:
            mc.setWithMeta('1', '{"Hello":"World"}', 3600, 0, 1,
                           0x1512a3186faa0000)
        except MemcachedError as error:
            stats = mc.stats()
            self.log.info('After 2nd setWithMeta(), curr_items: {} '
                          'and curr_temp_items: {}'.format(
                              stats['curr_items'], stats['curr_temp_items']))
            if int(stats['curr_temp_items']) == 1:
                self.fail("Error on second setWithMeta(), "
                          "expected curr_temp_items to be 0")
            else:
                self.log.info("<MemcachedError #%d ``%s''>" %
                              (error.status, error.message))

    def generate_docs_bigdata(self,
                              docs_per_day,
                              start=0,
                              document_size=1024000):
        return doc_generator(self.key,
                             start,
                             docs_per_day,
                             key_size=self.key_size,
                             doc_size=document_size,
                             doc_type=self.doc_type,
                             target_vbucket=self.target_vbucket,
                             vbuckets=self.cluster_util.vbuckets,
                             randomize_doc_size=self.randomize_doc_size,
                             randomize_value=self.randomize_value)

    def test_doc_size(self):
        def check_durability_failures():
            self.log.error(task.sdk_acked_curd_failed.keys())
            self.log.error(task.sdk_exception_crud_succeed.keys())
            self.assertTrue(
                len(task.sdk_acked_curd_failed) == 0,
                "Durability failed for docs: %s" %
                task.sdk_acked_curd_failed.keys())
            self.assertTrue(
                len(task.sdk_exception_crud_succeed) == 0,
                "Durability failed for docs: %s" %
                task.sdk_acked_curd_failed.keys())

        """
        Basic tests for document CRUD operations using JSON docs
        """
        doc_op = self.input.param("doc_op", None)
        def_bucket = self.bucket_util.buckets[0]
        ignore_exceptions = list()
        retry_exceptions = list()
        supported_d_levels = self.bucket_util.get_supported_durability_levels()

        # Stat validation reference variables
        verification_dict = dict()
        verification_dict["ops_create"] = 0
        verification_dict["ops_update"] = 0
        verification_dict["ops_delete"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["sync_write_committed_count"] = 0

        if self.target_vbucket and type(self.target_vbucket) is not list:
            self.target_vbucket = [self.target_vbucket]

        self.log.info("Creating doc_generator..")
        # Load basic docs into bucket
        doc_create = doc_generator(self.key,
                                   0,
                                   self.num_items,
                                   key_size=self.key_size,
                                   doc_size=self.doc_size,
                                   doc_type=self.doc_type,
                                   target_vbucket=self.target_vbucket,
                                   vbuckets=self.cluster_util.vbuckets,
                                   randomize_doc_size=self.randomize_doc_size,
                                   randomize_value=self.randomize_value)
        self.log.info("Loading {0} docs into the bucket: {1}".format(
            self.num_items, def_bucket))
        task = self.task.async_load_gen_docs(
            self.cluster,
            def_bucket,
            doc_create,
            "create",
            0,
            batch_size=self.batch_size,
            process_concurrency=self.process_concurrency,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            compression=self.sdk_compression,
            timeout_secs=self.sdk_timeout,
            ryow=self.ryow,
            check_persistence=self.check_persistence,
            scope=self.scope_name,
            collection=self.collection_name,
            sdk_client_pool=self.sdk_client_pool)
        self.task.jython_task_manager.get_task_result(task)

        if self.ryow:
            check_durability_failures()

        # Retry doc_exception code
        self.log.info("Validating failed doc's (if any) exceptions")
        doc_op_info_dict = dict()
        doc_op_info_dict[task] = self.bucket_util.get_doc_op_info_dict(
            def_bucket,
            "create",
            exp=0,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            timeout=self.sdk_timeout,
            time_unit="seconds",
            ignore_exceptions=ignore_exceptions,
            retry_exceptions=retry_exceptions)
        self.bucket_util.verify_doc_op_task_exceptions(doc_op_info_dict,
                                                       self.cluster,
                                                       self.sdk_client_pool)

        if len(doc_op_info_dict[task]["unwanted"]["fail"].keys()) != 0:
            self.fail("Failures in retry doc CRUDs: {0}".format(
                doc_op_info_dict[task]["unwanted"]["fail"]))

        self.log.info("Wait for ep_all_items_remaining to become '0'")
        self.bucket_util._wait_for_stats_all_buckets()

        # Update ref_val
        verification_dict["ops_create"] += \
            self.num_items - len(task.fail.keys())
        # Validate vbucket stats
        if self.durability_level in supported_d_levels:
            verification_dict["sync_write_committed_count"] += self.num_items

        failed = self.durability_helper.verify_vbucket_details_stats(
            def_bucket,
            self.cluster_util.get_kv_nodes(),
            vbuckets=self.cluster_util.vbuckets,
            expected_val=verification_dict)
        if failed:
            self.fail("Cbstat vbucket-details verification failed")

        # Verify initial doc load count
        self.log.info("Validating doc_count in buckets")
        self.bucket_util.validate_doc_count_as_per_collections(def_bucket)

        self.log.info("Creating doc_generator for doc_op")
        num_item_start_for_crud = int(self.num_items / 2)
        doc_update = doc_generator(self.key,
                                   0,
                                   num_item_start_for_crud,
                                   key_size=self.key_size,
                                   doc_size=self.doc_size,
                                   doc_type=self.doc_type,
                                   target_vbucket=self.target_vbucket,
                                   vbuckets=self.cluster_util.vbuckets,
                                   mutate=1,
                                   randomize_doc_size=self.randomize_doc_size,
                                   randomize_value=self.randomize_value)

        if self.target_vbucket:
            mutation_doc_count = len(doc_update.doc_keys)
        else:
            mutation_doc_count = (doc_update.end - doc_update.start +
                                  len(task.fail.keys()))

        if doc_op == "update":
            self.log.info("Performing 'update' mutation over the docs")
            task = self.task.async_load_gen_docs(
                self.cluster,
                def_bucket,
                doc_update,
                "update",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                compression=self.sdk_compression,
                timeout_secs=self.sdk_timeout,
                ryow=self.ryow,
                check_persistence=self.check_persistence,
                scope=self.scope_name,
                collection=self.collection_name,
                sdk_client_pool=self.sdk_client_pool)
            self.task.jython_task_manager.get_task_result(task)
            verification_dict["ops_update"] += mutation_doc_count
            if self.durability_level in supported_d_levels:
                verification_dict["sync_write_committed_count"] \
                    += mutation_doc_count
            if self.ryow:
                check_durability_failures()

            # Read all the values to validate update operation
            task = self.task.async_validate_docs(
                self.cluster,
                def_bucket,
                doc_update,
                "update",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                scope=self.scope_name,
                collection=self.collection_name,
                sdk_client_pool=self.sdk_client_pool)
            self.task.jython_task_manager.get_task_result(task)

        elif doc_op == "delete":
            self.log.info("Performing 'delete' mutation over the docs")
            task = self.task.async_load_gen_docs(
                self.cluster,
                def_bucket,
                doc_update,
                "delete",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                compression=self.sdk_compression,
                timeout_secs=self.sdk_timeout,
                ryow=self.ryow,
                check_persistence=self.check_persistence,
                scope=self.scope_name,
                collection=self.collection_name,
                sdk_client_pool=self.sdk_client_pool)
            self.task.jython_task_manager.get_task_result(task)
            if self.collection_name is None:
                target_scope = CbServer.default_scope
                target_collection = CbServer.default_collection
            else:
                target_scope = self.scope_name
                target_collection = self.collection_name

            def_bucket \
                .scopes[target_scope] \
                .collections[target_collection] \
                .num_items -= (self.num_items - num_item_start_for_crud)
            verification_dict["ops_delete"] += mutation_doc_count

            if self.durability_level in supported_d_levels:
                verification_dict["sync_write_committed_count"] \
                    += mutation_doc_count
            if self.ryow:
                check_durability_failures()

            # Read all the values to validate delete operation
            task = self.task.async_validate_docs(
                self.cluster,
                def_bucket,
                doc_update,
                "delete",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                sdk_client_pool=self.sdk_client_pool)
            self.task.jython_task_manager.get_task_result(task)

        else:
            self.log.warning("Unsupported doc_operation")

        self.log.info("Wait for ep_all_items_remaining to become '0'")
        self.bucket_util._wait_for_stats_all_buckets()

        failed = self.durability_helper.verify_vbucket_details_stats(
            def_bucket,
            self.cluster_util.get_kv_nodes(),
            vbuckets=self.cluster_util.vbuckets,
            expected_val=verification_dict)
        if failed:
            self.fail("Cbstat vbucket-details verification failed")

        self.log.info("Validating doc_count")
        self.bucket_util.validate_doc_count_as_per_collections(def_bucket)

    def test_large_doc_size(self):
        # bucket size=256MB, when Bucket gets filled 236MB then
        # test starts failing document size=2MB, No of docs = 221,
        # load 250 docs generate docs with size >= 1MB , See MB-29333

        self.doc_size *= 1024 * 1024
        gens_load = self.generate_docs_bigdata(docs_per_day=self.num_items,
                                               document_size=self.doc_size)
        for bucket in self.bucket_util.buckets:
            task = self.task.async_load_gen_docs(
                self.cluster,
                bucket,
                gens_load,
                "create",
                0,
                batch_size=10,
                process_concurrency=8,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                compression=self.sdk_compression,
                timeout_secs=self.sdk_timeout,
                sdk_client_pool=self.sdk_client_pool)
            self.task.jython_task_manager.get_task_result(task)

        # check if all the documents(250) are loaded with default timeout
        self.bucket_util.verify_stats_all_buckets(self.num_items)

    def test_large_doc_20MB(self):
        # test reproducer for MB-29258,
        # Load a doc which is greater than 20MB
        # with compression enabled and check if it fails
        # check with compression_mode as active, passive and off
        val_error = SDKException.ValueTooLargeException
        gens_load = self.generate_docs_bigdata(docs_per_day=1,
                                               document_size=(self.doc_size *
                                                              1024000))
        for bucket in self.bucket_util.buckets:
            task = self.task.async_load_gen_docs(
                self.cluster,
                bucket,
                gens_load,
                "create",
                0,
                batch_size=10,
                process_concurrency=8,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                compression=self.sdk_compression,
                timeout_secs=self.sdk_timeout,
                sdk_client_pool=self.sdk_client_pool)
            self.task.jython_task_manager.get_task_result(task)
            if self.doc_size > 20:
                if len(task.fail.keys()) == 0:
                    self.log_failure("No failures during large doc insert")
                for doc_id, doc_result in task.fail.items():
                    if val_error not in str(doc_result["error"]):
                        self.log_failure("Invalid exception for key %s: %s" %
                                         (doc_id, doc_result))
            else:
                if len(task.fail.keys()) != 0:
                    self.log_failure("Failures during large doc insert")

        for bucket in self.bucket_util.buckets:
            if self.doc_size > 20:
                # failed with error "Data Too Big" when document size > 20MB
                self.bucket_util.verify_stats_all_buckets(0)
            else:
                self.bucket_util.verify_stats_all_buckets(1)
                gens_update = self.generate_docs_bigdata(
                    docs_per_day=1, document_size=(21 * 1024000))
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    bucket,
                    gens_update,
                    "update",
                    0,
                    batch_size=10,
                    process_concurrency=8,
                    replicate_to=self.replicate_to,
                    persist_to=self.persist_to,
                    durability=self.durability_level,
                    compression=self.sdk_compression,
                    timeout_secs=self.sdk_timeout,
                    sdk_client_pool=self.sdk_client_pool)
                self.task.jython_task_manager.get_task_result(task)
                if len(task.fail.keys()) != 1:
                    self.log_failure("Large docs inserted for keys: %s" %
                                     task.fail.keys())
                if len(task.fail.keys()) == 0:
                    self.log_failure("No failures during large doc insert")
                for key, crud_result in task.fail.items():
                    if SDKException.ValueTooLargeException \
                            not in str(crud_result["error"]):
                        self.log_failure("Unexpected error for key %s: %s" %
                                         (key, crud_result["error"]))
                for doc_id, doc_result in task.fail.items():
                    if val_error not in str(doc_result["error"]):
                        self.log_failure("Invalid exception for key %s: %s" %
                                         (doc_id, doc_result))
                self.bucket_util.verify_stats_all_buckets(1)
        self.validate_test_failure()

    def test_parallel_cruds(self):
        data_op_dict = dict()
        num_items = self.num_items
        half_of_num_items = self.num_items / 2
        supported_d_levels = self.bucket_util.get_supported_durability_levels()
        exp_values_to_test = [0, 900, 4000, 12999]

        # Initial doc_loading
        initial_load = doc_generator(self.key,
                                     0,
                                     self.num_items,
                                     doc_size=self.doc_size)
        task = self.task.async_load_gen_docs(
            self.cluster,
            self.bucket_util.buckets[0],
            initial_load,
            DocLoading.Bucket.DocOps.CREATE,
            0,
            batch_size=100,
            process_concurrency=8,
            compression=self.sdk_compression,
            timeout_secs=self.sdk_timeout,
            sdk_client_pool=self.sdk_client_pool)
        self.task.jython_task_manager.get_task_result(task)

        # Create required doc_gens and doc_op task object
        for doc_op in self.doc_ops:
            if doc_op == DocLoading.Bucket.DocOps.CREATE:
                num_items += half_of_num_items
                gen_start = self.num_items
                gen_end = self.num_items + half_of_num_items
            elif doc_op == DocLoading.Bucket.DocOps.DELETE:
                gen_start = 0
                gen_end = half_of_num_items
            else:
                gen_start = half_of_num_items
                gen_end = self.num_items

            d_level = ""
            replicate_to = persist_to = 0
            if self.num_replicas > 0:
                replicate_to = randint(1, self.num_replicas)
                persist_to = randint(0, self.num_replicas + 1)
            if not self.observe_test and choice([True, False]):
                d_level = choice(supported_d_levels)

            self.log.info("Doc_op %s, range (%d, %d), "
                          "replicate_to=%s, persist_to=%s, d_level=%s" %
                          (doc_op, gen_start, gen_end, replicate_to,
                           persist_to, d_level))

            data_op_dict[doc_op] = dict()
            data_op_dict[doc_op]["doc_gen"] = doc_generator(
                self.key,
                gen_start,
                gen_end,
                doc_size=self.doc_size,
                mutation_type=doc_op)
            data_op_dict[doc_op]["task"] = self.task.async_load_gen_docs(
                self.cluster,
                self.bucket_util.buckets[0],
                data_op_dict[doc_op]["doc_gen"],
                doc_op,
                exp=choice(exp_values_to_test),
                compression=self.sdk_compression,
                persist_to=persist_to,
                replicate_to=replicate_to,
                durability=d_level,
                timeout_secs=self.sdk_timeout,
                sdk_client_pool=self.sdk_client_pool,
                process_concurrency=1,
                batch_size=1,
                print_ops_rate=False,
                start_task=False)

        # Start all tasks
        for doc_op in self.doc_ops:
            self.task_manager.add_new_task(data_op_dict[doc_op]["task"])
        # Wait for doc_ops to complete and validate final doc value result
        for doc_op in self.doc_ops:
            self.task_manager.get_task_result(data_op_dict[doc_op]["task"])
            self.log.info("%s task completed" % doc_op)
            if data_op_dict[doc_op]["task"].fail:
                self.log_failure("Doc_loading failed for %s: %s" %
                                 (doc_op, data_op_dict[doc_op]["task"].fail))
            elif doc_op in [
                    DocLoading.Bucket.DocOps.CREATE,
                    DocLoading.Bucket.DocOps.UPDATE,
                    DocLoading.Bucket.DocOps.REPLACE,
                    DocLoading.Bucket.DocOps.DELETE
            ]:
                suppress_err_tbl = False
                if doc_op == DocLoading.Bucket.DocOps.DELETE:
                    suppress_err_tbl = True
                self.log.info("Validating %s results" % doc_op)
                # Read all the values to validate doc_operation values
                task = self.task.async_validate_docs(
                    self.cluster,
                    self.bucket_util.buckets[0],
                    data_op_dict[doc_op]["doc_gen"],
                    doc_op,
                    0,
                    batch_size=self.batch_size,
                    process_concurrency=self.process_concurrency,
                    sdk_client_pool=self.sdk_client_pool,
                    suppress_error_table=suppress_err_tbl)
                self.task.jython_task_manager.get_task_result(task)

        self.validate_test_failure()

    def test_diag_eval_curl(self):
        # Check if diag/eval can be done only by local host
        self.disable_diag_eval_on_non_local_host = \
            self.input.param("disable_diag_eval_non_local", False)
        port = self.cluster.master.port

        # check if local host can work fine
        cmd = []
        cmd_base = 'curl http://{0}:{1}@localhost:{2}/diag/eval ' \
            .format(self.cluster.master.rest_username,
                    self.cluster.master.rest_password, port)
        command = cmd_base + '-X POST -d \'os:cmd("env")\''
        cmd.append(command)
        command = cmd_base + '-X POST -d \'case file:read_file("/etc/passwd") of {ok, B} -> io:format("~p~n", [binary_to_term(B)]) end.\''
        cmd.append(command)

        shell = RemoteMachineShellConnection(self.cluster.master)
        for command in cmd:
            output, error = shell.execute_command(command)
            self.assertNotEquals("API is accessible from localhost only",
                                 output[0])

        # Disable allow_nonlocal_eval
        if not self.disable_diag_eval_on_non_local_host:
            command = cmd_base + '-X POST -d \'ns_config:set(allow_nonlocal_eval, true).\''
            _, _ = shell.execute_command(command)

        # Check ip address on diag/eval will not work fine
        # when allow_nonlocal_eval is disabled
        cmd = []
        cmd_base = 'curl http://{0}:{1}@{2}:{3}/diag/eval ' \
            .format(self.cluster.master.rest_username,
                    self.cluster.master.rest_password,
                    self.cluster.master.ip, port)
        command = cmd_base + '-X POST -d \'os:cmd("env")\''
        cmd.append(command)
        command = cmd_base + '-X POST -d \'case file:read_file("/etc/passwd") of {ok, B} -> io:format("~p~n", [binary_to_term(B)]) end.\''
        cmd.append(command)

        for command in cmd:
            output, error = shell.execute_command(command)
            if self.disable_diag_eval_on_non_local_host:
                self.assertEquals("API is accessible from localhost only",
                                  output[0])
            else:
                self.assertNotEquals("API is accessible from localhost only",
                                     output[0])

    def test_MB_40967(self):
        """
        1. Load initial docs into the bucket
        2. Perform continuous reads until get_cmd stats breaks in
           'cbstats timings' command
        """
        total_gets = 0
        max_gets = 2500000000
        bucket = self.bucket_util.buckets[0]
        doc_gen = doc_generator(self.key, 0, self.num_items, doc_size=1)
        create_task = self.task.async_load_gen_docs(
            self.cluster,
            bucket,
            doc_gen,
            "create",
            0,
            batch_size=100,
            process_concurrency=self.process_concurrency,
            timeout_secs=self.sdk_timeout)
        self.task_manager.get_task_result(create_task)

        cbstat = dict()
        kv_nodes = self.cluster_util.get_kv_nodes()
        for node in kv_nodes:
            shell = RemoteMachineShellConnection(node)
            cbstat[node] = Cbstats(shell)

        self.log.info("Start doc_reads until total_gets cross: %s" % max_gets)
        read_task = self.task.async_continuous_doc_ops(
            self.cluster,
            bucket,
            doc_gen,
            op_type="read",
            batch_size=self.batch_size,
            process_concurrency=self.process_concurrency,
            timeout_secs=self.sdk_timeout)
        self.sleep(60, "Wait for read task to start")
        while total_gets < max_gets:
            total_gets = 0
            for node in kv_nodes:
                output, error = cbstat[node].get_timings(bucket.name)
                if error:
                    self.log_failure("Error during cbstat timings: %s" % error)
                    break

                get_cmd_found = False
                for line in output:
                    if "get_cmd_" in line:
                        if "get_cmd_mean" in line:
                            break
                        get_cmd_found = True
                if not get_cmd_found:
                    self.log.error(output)
                    self.log_failure("cbstat timings get_cmd stats not found")
                    break
                vb_details = cbstat[node].vbucket_details(bucket.name)
                for _, vb_stats in vb_details.items():
                    total_gets += long(vb_stats["ops_get"])
            if self.test_failure:
                break
            self.sleep(
                120,
                "Total_gets: %s, itr: %s" % (total_gets, read_task.itr_count))

        read_task.end_task()
        self.task_manager.get_task_result(read_task)

        # Close all shell connections
        for node in kv_nodes:
            cbstat[node].shellConn.disconnect()

        self.validate_test_failure()

    def test_MB_41510(self):
        """
        1. Load initial docs into the bucket
        2. Perform continuous reads
        3. Perform 'mcstat reset' in parallel to the reads
        4. Perform 'cbstats timings' command to read the current values
        5. Validate there is no crash when stats are getting reset continuously
        """
        def reset_mcstat(bucket_name):
            mc_stat = dict()
            for t_node in kv_nodes:
                shell_conn = RemoteMachineShellConnection(t_node)
                mc_stat[t_node] = McStat(shell_conn)

            while not stop_thread:
                for t_node in mc_stat.keys():
                    try:
                        mc_stat[t_node].reset(bucket_name)
                    except Exception as mcstat_err:
                        self.log_failure(mcstat_err)
                if self.test_failure:
                    break

            for t_node in mc_stat.keys():
                mc_stat[t_node].shellConn.disconnect()

        def get_timings(bucket_name):
            cb_stat = dict()
            for t_node in kv_nodes:
                shell_conn = RemoteMachineShellConnection(t_node)
                cb_stat[t_node] = Cbstats(shell_conn)

            while not stop_thread:
                for t_node in cb_stat.keys():
                    try:
                        cb_stat[t_node].get_timings(bucket_name)
                    except Exception as cbstat_err:
                        self.log_failure(cbstat_err)
                if self.test_failure:
                    break

            for t_node in cb_stat.keys():
                cb_stat[t_node].shellConn.disconnect()

        total_gets = 0
        max_gets = 50000000
        stop_thread = False
        bucket = self.bucket_util.buckets[0]
        cb_stat_obj = dict()
        kv_nodes = self.cluster_util.get_kv_nodes()
        for node in self.cluster_util.get_kv_nodes():
            shell = RemoteMachineShellConnection(node)
            cb_stat_obj[node] = Cbstats(shell)

        doc_gen = doc_generator(self.key, 0, self.num_items, doc_size=1)
        create_task = self.task.async_load_gen_docs(
            self.cluster,
            bucket,
            doc_gen,
            "create",
            0,
            batch_size=500,
            process_concurrency=self.process_concurrency,
            timeout_secs=self.sdk_timeout)
        self.task_manager.get_task_result(create_task)

        mc_stat_reset_thread = Thread(target=reset_mcstat, args=[bucket.name])
        get_timings_thread = Thread(target=get_timings, args=[bucket.name])
        mc_stat_reset_thread.start()
        get_timings_thread.start()

        read_task = self.task.async_continuous_doc_ops(
            self.cluster,
            bucket,
            doc_gen,
            op_type="read",
            batch_size=self.batch_size,
            process_concurrency=self.process_concurrency,
            timeout_secs=self.sdk_timeout)

        while total_gets < max_gets:
            total_gets = 0
            try:
                for node in cb_stat_obj.keys():
                    vb_details = cb_stat_obj[node].vbucket_details(bucket.name)
                    for _, vb_stats in vb_details.items():
                        total_gets += long(vb_stats["ops_get"])
            except Exception as err:
                self.log_failure(err)

            self.log.info("Total gets: %s" % total_gets)
            result, core_msg, stream_msg = self.check_coredump_exist(
                self.servers, force_collect=True)

            if result is not False:
                self.log_failure(core_msg + stream_msg)
                break
            elif self.test_failure:
                break

            self.sleep(60, "Wait before next check")

        stop_thread = True
        read_task.end_task()
        mc_stat_reset_thread.join()
        get_timings_thread.join()

        # Close all shell connections
        for node in cb_stat_obj.keys():
            cb_stat_obj[node].shellConn.disconnect()

        self.validate_test_failure()

    def verify_stat(self, items, value="active"):
        mc = MemcachedClient(self.cluster.master.ip, constants.memcached_port)
        mc.sasl_auth_plain(self.cluster.master.rest_username,
                           self.cluster.master.rest_password)
        mc.bucket_select('default')
        stats = mc.stats()
        self.assertEquals(stats['ep_compression_mode'], value)
        self.assertEquals(int(stats['ep_item_compressor_num_compressed']),
                          items)
        self.assertNotEquals(int(stats['vb_active_itm_memory']),
                             int(stats['vb_active_itm_memory_uncompressed']))

    def test_compression_active_and_off(self):
        """
        test reproducer for MB-29272,
        Load some documents with compression mode set to active
        get the cbstats
        change compression mode to off and wait for minimum 250ms
        Load some more documents and check the compression is not done
        epengine.basic_ops.basic_ops.test_compression_active_and_off,items=10000,compression_mode=active

        :return:
        """
        # Load some documents with compression mode as active
        gen_create = doc_generator("eviction1_",
                                   start=0,
                                   end=self.num_items,
                                   key_size=self.key_size,
                                   doc_size=self.doc_size,
                                   doc_type=self.doc_type,
                                   vbuckets=self.cluster_util.vbuckets,
                                   randomize_doc_size=self.randomize_doc_size,
                                   randomize_value=self.randomize_value)
        gen_create2 = doc_generator("eviction2_",
                                    start=0,
                                    end=self.num_items,
                                    key_size=self.key_size,
                                    doc_size=self.doc_size,
                                    doc_type=self.doc_type,
                                    vbuckets=self.cluster_util.vbuckets,
                                    randomize_doc_size=self.randomize_doc_size,
                                    randomize_value=self.randomize_value)
        def_bucket = self.bucket_util.get_all_buckets()[0]
        task = self.task.async_load_gen_docs(
            self.cluster,
            def_bucket,
            gen_create,
            "create",
            0,
            batch_size=10,
            process_concurrency=8,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            compression=self.sdk_compression,
            timeout_secs=self.sdk_timeout,
            sdk_client_pool=self.sdk_client_pool)
        self.task.jython_task_manager.get_task_result(task)
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.verify_stats_all_buckets(self.num_items)

        remote = RemoteMachineShellConnection(self.cluster.master)
        for bucket in self.bucket_util.buckets:
            # change compression mode to off
            output, _ = remote.execute_couchbase_cli(
                cli_command='bucket-edit',
                cluster_host="localhost:8091",
                user=self.cluster.master.rest_username,
                password=self.cluster.master.rest_password,
                options='--bucket=%s --compression-mode off' % bucket.name)
            self.assertTrue(' '.join(output).find('SUCCESS') != -1,
                            'compression mode set to off')

            # sleep for 10 sec (minimum 250sec)
            self.sleep(10)

        # Load data and check stats to see compression
        # is not done for newly added data
        task = self.task.async_load_gen_docs(
            self.cluster,
            def_bucket,
            gen_create2,
            "create",
            0,
            batch_size=10,
            process_concurrency=8,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            compression=self.sdk_compression,
            timeout_secs=self.sdk_timeout,
            sdk_client_pool=self.sdk_client_pool)
        self.task.jython_task_manager.get_task_result(task)
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.verify_stats_all_buckets(self.num_items * 2)

    def MB36948(self):
        node_to_stop = self.servers[0]
        self.log.info("Adding index/query node")
        self.task.rebalance([self.cluster.master], [self.servers[2]], [],
                            services=["n1ql,index"])
        self.log.info("Creating SDK client connection")
        client = SDKClient([self.cluster.master],
                           self.bucket_util.buckets[0],
                           compression_settings=self.sdk_compression)

        self.log.info("Stopping memcached on: %s" % node_to_stop)
        ssh_conn = RemoteMachineShellConnection(node_to_stop)
        err_sim = CouchbaseError(self.log, ssh_conn)
        err_sim.create(CouchbaseError.STOP_MEMCACHED)

        result = client.crud("create", "abort1", "abort1_val")
        if not result["status"]:
            self.log_failure("Async SET failed")

        result = client.crud("update",
                             "abort1",
                             "abort1_val",
                             durability=self.durability_level,
                             timeout=3,
                             time_unit="seconds")
        if result["status"]:
            self.log_failure("Sync write succeeded")
        if SDKException.DurabilityAmbiguousException not in result["error"]:
            self.log_failure("Invalid exception for sync_write: %s" % result)

        self.log.info("Resuming memcached on: %s" % node_to_stop)
        err_sim.revert(CouchbaseError.STOP_MEMCACHED)

        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.verify_stats_all_buckets(1)

        self.log.info("Closing ssh & SDK connections")
        ssh_conn.disconnect()
        client.close()

        self.validate_test_failure()

    def do_get_random_key(self):
        # MB-31548, get_Random key gets hung sometimes.
        mc = MemcachedClient(self.cluster.master.ip, constants.memcached_port)
        mc.sasl_auth_plain(self.cluster.master.rest_username,
                           self.cluster.master.rest_password)
        mc.bucket_select('default')

        count = 0
        while count < 1000000:
            count += 1
            try:
                mc.get_random_key()
            except MemcachedError as error:
                self.fail("<MemcachedError #%d ``%s''>" %
                          (error.status, error.message))
            if count % 1000 == 0:
                self.log.info('The number of iteration is {}'.format(count))
    def test_bulk_sync_write_in_progress(self):
        doc_ops = self.input.param("doc_ops").split(';')
        shell_conn = dict()
        cbstat_obj = dict()
        error_sim = dict()
        vb_info = dict()
        active_vbs = dict()
        replica_vbs = dict()
        sync_write_in_progress = \
            SDKException.RetryReason.KV_SYNC_WRITE_IN_PROGRESS

        # Override d_level, error_simulation type based on d_level
        self.__get_d_level_and_error_to_simulate()

        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)
        for node in target_nodes:
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(node)
            vb_info["init"] = dict()
            vb_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                self.bucket.name)
            error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip])
            # Fetch affected nodes' vb_num which are of type=replica
            active_vbs[node.ip] = cbstat_obj[node.ip].vbucket_list(
                self.bucket.name, vbucket_type="active")
            replica_vbs[node.ip] = cbstat_obj[node.ip].vbucket_list(
                self.bucket.name, vbucket_type="replica")

        target_vbs = replica_vbs
        if self.durability_level \
                == Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE:
            target_vbs = active_vbs
            target_vbuckets = list()
            for target_node in target_nodes:
                target_vbuckets += target_vbs[target_node.ip]
        else:
            target_vbuckets = target_vbs[target_nodes[0].ip]
            if len(target_nodes) > 1:
                index = 1
                while index < len(target_nodes):
                    target_vbuckets = list(
                        set(target_vbuckets).intersection(
                            set(target_vbs[target_nodes[index].ip])))
                    index += 1

        doc_load_spec = dict()
        doc_load_spec["doc_crud"] = dict()
        doc_load_spec[MetaCrudParams.TARGET_VBUCKETS] = target_vbuckets
        doc_load_spec[MetaCrudParams.DURABILITY_LEVEL] = self.durability_level
        doc_load_spec[MetaCrudParams.COLLECTIONS_CONSIDERED_FOR_CRUD] = 5
        doc_load_spec[MetaCrudParams.SCOPES_CONSIDERED_FOR_CRUD] = "all"
        doc_load_spec[MetaCrudParams.SDK_TIMEOUT] = 60
        doc_load_spec["doc_crud"][MetaCrudParams.DocCrud.COMMON_DOC_KEY] \
            = "test_collections"

        if doc_ops[0] == "create":
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops[0] == "update":
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.UPDATE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops[0] == "replace":
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.REPLACE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops[0] == "delete":
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.DELETE_PERCENTAGE_PER_COLLECTION] = 1

        # Induce error condition for testing
        for node in target_nodes:
            error_sim[node.ip].create(self.simulate_error,
                                      bucket_name=self.bucket.name)

        doc_loading_task = \
            self.bucket_util.run_scenario_from_spec(
                self.task,
                self.cluster,
                self.cluster.buckets,
                doc_load_spec,
                async_load=True)

        self.sleep(5, "Wait for doc ops to reach server")

        tem_durability = self.durability_level
        if self.with_non_sync_writes:
            tem_durability = "NONE"

        for bucket, s_dict in doc_loading_task.loader_spec.items():
            for s_name, c_dict in s_dict["scopes"].items():
                for c_name, c_meta in c_dict["collections"].items():
                    for op_type in c_meta:
                        # This will support both sync-write and non-sync-writes
                        doc_loader_task_2 = self.task.async_load_gen_docs(
                            self.cluster,
                            self.bucket,
                            c_meta[op_type]["doc_gen"],
                            doc_ops[1],
                            0,
                            scope=s_name,
                            collection=c_name,
                            sdk_client_pool=self.sdk_client_pool,
                            batch_size=self.crud_batch_size,
                            process_concurrency=1,
                            replicate_to=self.replicate_to,
                            persist_to=self.persist_to,
                            durability=tem_durability,
                            timeout_secs=3,
                            print_ops_rate=False,
                            skip_read_on_error=True,
                            task_identifier="parallel_task2")
                        self.task.jython_task_manager.get_task_result(
                            doc_loader_task_2)

                        # Validation to verify the sync_in_write_errors
                        # in doc_loader_task_2
                        failed_docs = doc_loader_task_2.fail
                        if len(failed_docs.keys()) != 1:
                            self.log_failure(
                                "Exception not seen for docs: %s" %
                                failed_docs)

                        valid_exception = self.durability_helper\
                            .validate_durability_exception(
                                failed_docs,
                                SDKException.AmbiguousTimeoutException,
                                retry_reason=sync_write_in_progress)

                        if not valid_exception:
                            self.log_failure("Got invalid exception")

        # Revert the introduced error condition
        for node in target_nodes:
            error_sim[node.ip].revert(self.simulate_error,
                                      bucket_name=self.bucket.name)

        # Wait for doc_loading to complete
        self.task_manager.get_task_result(doc_loading_task)
        self.bucket_util.validate_doc_loading_results(doc_loading_task)
        if doc_loading_task.result is False:
            self.log_failure("Doc CRUDs failed")

        # Validate docs for update success or not
        if doc_ops[0] == "update":
            for bucket, s_dict in doc_loading_task.loader_spec.items():
                for s_name, c_dict in s_dict["scopes"].items():
                    for c_name, c_meta in c_dict["collections"].items():
                        for op_type in c_meta:
                            read_task = self.task.async_load_gen_docs(
                                self.cluster,
                                self.bucket,
                                c_meta[op_type]["doc_gen"],
                                "read",
                                batch_size=self.crud_batch_size,
                                process_concurrency=1,
                                timeout_secs=self.sdk_timeout)
                            self.task_manager.get_task_result(read_task)
                            for key, doc_info in read_task.success.items():
                                if doc_info["cas"] != 0 \
                                        and json.loads(str(doc_info["value"])
                                                       )["mutated"] != 1:
                                    self.log_failure(
                                        "Update failed for key %s: %s" %
                                        (key, doc_info))

        # Validate doc_count per collection
        self.validate_test_failure()
        self.bucket_util.validate_docs_per_collections_all_buckets(
            self.cluster)
    def test_sub_doc_sync_write_in_progress(self):
        """
        Test to simulate sync_write_in_progress error and validate the behavior
        This will validate failure in majority of nodes, where durability will
        surely fail for all CRUDs

        1. Select nodes to simulate the error which will affect the durability
        2. Enable the specified error_scenario on the selected nodes
        3. Perform individual CRUDs and verify sync_write_in_progress errors
        4. Validate the end results
        """

        doc_ops = self.input.param("doc_ops", "insert")

        shell_conn = dict()
        cbstat_obj = dict()
        error_sim = dict()
        vb_info = dict()
        active_vbs = dict()
        replica_vbs = dict()
        vb_info["init"] = dict()
        doc_load_spec = dict()

        # Override d_level, error_simulation type based on d_level
        self.__get_d_level_and_error_to_simulate()

        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)
        for node in target_nodes:
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(node)
            vb_info["init"] = dict()
            vb_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                self.bucket.name)
            error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip])
            # Fetch affected nodes' vb_num which are of type=replica
            active_vbs[node.ip] = cbstat_obj[node.ip].vbucket_list(
                self.bucket.name, vbucket_type="active")
            replica_vbs[node.ip] = cbstat_obj[node.ip].vbucket_list(
                self.bucket.name, vbucket_type="replica")

        target_vbs = replica_vbs
        if self.durability_level \
                == Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE:
            target_vbs = active_vbs
            target_vbuckets = list()
            for target_node in target_nodes:
                target_vbuckets += target_vbs[target_node.ip]
        else:
            target_vbuckets = target_vbs[target_nodes[0].ip]
            if len(target_nodes) > 1:
                index = 1
                while index < len(target_nodes):
                    target_vbuckets = list(
                        set(target_vbuckets).intersection(
                            set(target_vbs[target_nodes[index].ip])))
                    index += 1

        amb_timeout = SDKException.AmbiguousTimeoutException
        kv_sync_write_in_progress = \
            SDKException.RetryReason.KV_SYNC_WRITE_IN_PROGRESS
        doc_not_found_exception = SDKException.DocumentNotFoundException

        self.load_data_for_sub_doc_ops()

        doc_load_spec["doc_crud"] = dict()
        doc_load_spec["subdoc_crud"] = dict()
        doc_load_spec["doc_crud"][MetaCrudParams.DocCrud.COMMON_DOC_KEY] \
            = "test_collections"
        doc_load_spec[MetaCrudParams.TARGET_VBUCKETS] = target_vbuckets
        doc_load_spec[MetaCrudParams.DURABILITY_LEVEL] = self.durability_level
        doc_load_spec[MetaCrudParams.COLLECTIONS_CONSIDERED_FOR_CRUD] = 5
        doc_load_spec[MetaCrudParams.SCOPES_CONSIDERED_FOR_CRUD] = "all"
        doc_load_spec[MetaCrudParams.SDK_TIMEOUT] = 60

        # Acquire SDK client from the pool for performing doc_ops locally
        client = self.sdk_client_pool.get_client_for_bucket(self.bucket)
        # Override the crud_batch_size
        self.crud_batch_size = 5

        # Update mutation spec based on the required doc_operation
        if doc_ops == DocLoading.Bucket.DocOps.CREATE:
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops in DocLoading.Bucket.DocOps.UPDATE:
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.UPDATE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops == DocLoading.Bucket.DocOps.DELETE:
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.DELETE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops == DocLoading.Bucket.SubDocOps.INSERT:
            doc_load_spec["subdoc_crud"][
                MetaCrudParams.SubDocCrud.INSERT_PER_COLLECTION] = 1
        elif doc_ops == DocLoading.Bucket.SubDocOps.UPSERT:
            doc_load_spec["subdoc_crud"][
                MetaCrudParams.SubDocCrud.UPSERT_PER_COLLECTION] = 1
        elif doc_ops == DocLoading.Bucket.SubDocOps.REMOVE:
            doc_load_spec["subdoc_crud"][
                MetaCrudParams.SubDocCrud.REMOVE_PER_COLLECTION] = 1

        # This is to support both sync-write and non-sync-writes
        tem_durability = self.durability_level
        if self.with_non_sync_writes:
            tem_durability = Bucket.DurabilityLevel.NONE

        # Perform specified action
        for node in target_nodes:
            error_sim[node.ip].create(self.simulate_error,
                                      bucket_name=self.bucket.name)
        self.sleep(5, "Wait for error simulation to take effect")

        # Initialize tasks and store the task objects
        doc_loading_task = \
            self.bucket_util.run_scenario_from_spec(
                self.task,
                self.cluster,
                self.cluster.buckets,
                doc_load_spec,
                mutation_num=2,
                batch_size=1,
                async_load=True)

        # Start the doc_loader_task
        self.sleep(10, "Wait for task_1 CRUDs to reach server")

        for bucket, s_dict in doc_loading_task.loader_spec.items():
            for s_name, c_dict in s_dict["scopes"].items():
                for c_name, c_meta in c_dict["collections"].items():
                    for op_type in c_meta:
                        key, _ = c_meta[op_type]["doc_gen"].next()
                        expected_exception = amb_timeout
                        retry_reason = kv_sync_write_in_progress
                        if doc_ops == "create":
                            expected_exception = doc_not_found_exception
                            retry_reason = None

                        for sub_doc_op in [
                                DocLoading.Bucket.SubDocOps.INSERT,
                                DocLoading.Bucket.SubDocOps.UPSERT,
                                DocLoading.Bucket.SubDocOps.REMOVE
                        ]:
                            val = ["my_mutation", "val"]
                            if sub_doc_op \
                                    == DocLoading.Bucket.SubDocOps.REMOVE:
                                val = "mutated"
                            result = client.crud(sub_doc_op,
                                                 key,
                                                 val,
                                                 durability=tem_durability,
                                                 timeout=2)

                            if result[0]:
                                self.log_failure("Doc crud succeeded for %s" %
                                                 op_type)
                            elif expected_exception \
                                    not in str(result[1][key]["error"]):
                                self.log_failure(
                                    "Invalid exception for key %s: %s" %
                                    (key, result[1][key]["error"]))
                            elif retry_reason is not None and \
                                    retry_reason \
                                    not in str(result[1][key]["error"]):
                                self.log_failure(
                                    "Retry reason missing for key %s: %s" %
                                    (key, result[1][key]["error"]))

        # Revert the introduced error condition
        for node in target_nodes:
            error_sim[node.ip].revert(self.simulate_error,
                                      bucket_name=self.bucket.name)

        # Wait for doc_loader_task_1 to complete
        self.task.jython_task_manager.get_task_result(doc_loading_task)
        self.bucket_util.validate_doc_loading_results(doc_loading_task)
        if doc_loading_task.result is False:
            self.log_failure("Doc CRUDs failed")

        # Validate docs for update success or not
        if doc_ops == DocLoading.Bucket.DocOps.UPDATE:
            for bucket, s_dict in doc_loading_task.loader_spec.items():
                for s_name, c_dict in s_dict["scopes"].items():
                    for c_name, c_meta in c_dict["collections"].items():
                        for op_type in c_meta:
                            c_meta[op_type]["doc_gen"].reset()
                            read_task = self.task.async_load_gen_docs(
                                self.cluster,
                                self.bucket,
                                c_meta[op_type]["doc_gen"],
                                DocLoading.Bucket.DocOps.READ,
                                batch_size=self.crud_batch_size,
                                process_concurrency=1,
                                timeout_secs=self.sdk_timeout)
                            self.task_manager.get_task_result(read_task)
                            for key, doc_info in read_task.success.items():
                                if doc_info["cas"] != 0 and \
                                        json.loads(str(doc_info["value"])
                                                   )["mutated"] != 2:
                                    self.log_failure(
                                        "Update failed for key %s: %s" %
                                        (key, doc_info))

        # Release the acquired SDK client
        self.sdk_client_pool.release_client(client)

        # Verify initial doc load count
        self.bucket_util._wait_for_stats_all_buckets(self.cluster,
                                                     self.cluster.buckets)
        self.bucket_util.validate_docs_per_collections_all_buckets(
            self.cluster)
        self.validate_test_failure()
    def test_sub_doc_with_persistence_issues(self):
        """
        1. Select nodes from the cluster to simulate the specified error
        2. Perform CRUD on the target bucket with given timeout
        3. Using cbstats to verify the operation succeeds
        4. Validate all mutations met the durability condition
        """

        if self.durability_level.upper() in [
                Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE,
                Bucket.DurabilityLevel.PERSIST_TO_MAJORITY
        ]:
            self.log.critical("Test not valid for persistence durability")
            return

        error_sim = dict()
        shell_conn = dict()
        cbstat_obj = dict()
        failover_info = dict()
        vb_info_info = dict()
        active_vbs_in_target_nodes = list()
        failover_info["init"] = dict()
        failover_info["afterCrud"] = dict()
        vb_info_info["init"] = dict()
        vb_info_info["afterCrud"] = dict()
        def_bucket = self.bucket_util.buckets[0]

        load_spec = dict()
        load_spec["doc_crud"] = dict()
        load_spec["subdoc_crud"] = dict()
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.COMMON_DOC_KEY] = "test_collections"
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.READ_PERCENTAGE_PER_COLLECTION] = 50
        load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.INSERT_PER_COLLECTION] = 20
        load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.UPSERT_PER_COLLECTION] = 10
        load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.REMOVE_PER_COLLECTION] = 10

        self.log.info("Selecting nodes to simulate error condition")
        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)

        # Create new docs for sub-doc operations to run
        self.load_data_for_sub_doc_ops()

        self.log.info("Will simulate error condition on %s" % target_nodes)
        for node in target_nodes:
            # Create shell_connections
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(shell_conn[node.ip])
            active_vbs = cbstat_obj[node.ip].vbucket_list(
                def_bucket.name, "active")
            active_vbs_in_target_nodes += active_vbs
            vb_info_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                def_bucket.name)
            failover_info["init"][node.ip] = \
                cbstat_obj[node.ip].failover_stats(def_bucket.name)

        for node in target_nodes:
            # Perform specified action
            error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip])
            error_sim[node.ip].create(self.simulate_error,
                                      bucket_name=def_bucket.name)

        # Perform CRUDs with induced error scenario is active
        self.log.info("Perform 'insert', 'upsert', 'remove' mutations")
        doc_loading_task = \
            self.bucket_util.run_scenario_from_spec(
                self.task,
                self.cluster,
                self.bucket_util.buckets,
                load_spec,
                mutation_num=0,
                async_load=True)

        # Perform new scope/collection creation during doc ops in parallel
        self.__perform_collection_crud(mutation_num=1)

        # Wait for doc_loading to complete and validate the doc ops
        self.task_manager.get_task_result(doc_loading_task)
        if doc_loading_task.result is False:
            self.log_failure("Doc CRUDs failed with persistence issue")

        # Revert the induced error condition
        for node in target_nodes:
            error_sim[node.ip].revert(self.simulate_error,
                                      bucket_name=def_bucket.name)

        # Fetch latest failover stats and validate the values are updated
        self.log.info("Validating failover and seqno cbstats")
        for node in target_nodes:
            vb_info_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].vbucket_seqno(def_bucket.name)
            failover_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].failover_stats(def_bucket.name)

            # Failover validation
            val = \
                failover_info["init"][node.ip] \
                == failover_info["afterCrud"][node.ip]
            self.assertTrue(val, msg="Failover stats not updated")

            # Seq_no validation (High level)
            val = \
                vb_info_info["init"][node.ip] \
                != vb_info_info["afterCrud"][node.ip]
            self.assertTrue(val, msg="vbucket seq_no not updated after CRUDs")

        # Disconnect the shell connection
        for node in target_nodes:
            shell_conn[node.ip].disconnect()

        self.validate_test_failure()
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.validate_docs_per_collections_all_buckets()
示例#25
0
class CrashTest(BaseTestCase):
    def setUp(self):
        super(CrashTest, self).setUp()

        self.doc_ops = self.input.param("doc_ops", None)
        self.process_name = self.input.param("process", None)
        self.service_name = self.input.param("service", "data")
        self.sig_type = self.input.param("sig_type", "SIGKILL").upper()
        self.target_node = self.input.param("target_node", "active")

        self.pre_warmup_stats = {}
        self.timeout = 120
        self.new_docs_to_add = 10000

        if self.doc_ops is not None:
            self.doc_ops = self.doc_ops.split(";")

        nodes_init = self.cluster.servers[1:self.nodes_init] \
            if self.nodes_init != 1 else []
        self.task.rebalance([self.cluster.master], nodes_init, [])
        self.cluster.nodes_in_cluster.extend([self.cluster.master] +
                                             nodes_init)
        if not self.atomicity:
            self.durability_helper = DurabilityHelper(
                self.log,
                self.nodes_init,
                durability=self.durability_level,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to)
        self.bucket_util.create_default_bucket(
            bucket_type=self.bucket_type,
            ram_quota=self.bucket_size,
            replica=self.num_replicas,
            compression_mode="off",
            storage=self.bucket_storage,
            eviction_policy=self.bucket_eviction_policy)
        self.bucket_util.add_rbac_user()

        if self.sdk_client_pool:
            self.log.info("Creating SDK clients for client_pool")
            for bucket in self.bucket_util.buckets:
                self.sdk_client_pool.create_clients(
                    bucket, [self.cluster.master],
                    self.sdk_pool_capacity,
                    compression_settings=self.sdk_compression)

        verification_dict = dict()
        verification_dict["ops_create"] = self.num_items
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["pending_writes"] = 0
        if self.durability_level:
            verification_dict["sync_write_committed_count"] = self.num_items

        # Load initial documents into the buckets
        self.log.info("Loading initial documents")
        gen_create = doc_generator(self.key,
                                   0,
                                   self.num_items,
                                   key_size=self.key_size,
                                   doc_size=self.doc_size,
                                   doc_type=self.doc_type,
                                   target_vbucket=self.target_vbucket,
                                   vbuckets=self.cluster_util.vbuckets)
        if self.atomicity:
            task = self.task.async_load_gen_docs_atomicity(
                self.cluster,
                self.bucket_util.buckets,
                gen_create,
                "create",
                exp=0,
                batch_size=10,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                update_count=self.update_count,
                transaction_timeout=self.transaction_timeout,
                commit=True,
                sync=self.sync)
            self.task.jython_task_manager.get_task_result(task)
        else:
            for bucket in self.bucket_util.buckets:
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    bucket,
                    gen_create,
                    DocLoading.Bucket.DocOps.CREATE,
                    self.maxttl,
                    persist_to=self.persist_to,
                    replicate_to=self.replicate_to,
                    durability=self.durability_level,
                    batch_size=10,
                    process_concurrency=8,
                    sdk_client_pool=self.sdk_client_pool)
                self.task.jython_task_manager.get_task_result(task)

                self.bucket_util._wait_for_stats_all_buckets()
                # Verify cbstats vbucket-details
                stats_failed = \
                    self.durability_helper.verify_vbucket_details_stats(
                        bucket, self.cluster_util.get_kv_nodes(),
                        vbuckets=self.cluster_util.vbuckets,
                        expected_val=verification_dict)

                if stats_failed:
                    self.fail("Cbstats verification failed")

            self.bucket_util.verify_stats_all_buckets(self.num_items)
        self.cluster_util.print_cluster_stats()
        self.bucket_util.print_bucket_stats()
        self.log.info("==========Finished CrashTest setup========")

    def tearDown(self):
        super(CrashTest, self).tearDown()

    def getTargetNode(self):
        if len(self.cluster.nodes_in_cluster) > 1:
            return self.cluster.nodes_in_cluster[randint(
                0, self.nodes_init - 1)]
        return self.cluster.master

    def getVbucketNumbers(self, shell_conn, bucket_name, replica_type):
        cb_stats = Cbstats(shell_conn)
        return cb_stats.vbucket_list(bucket_name, replica_type)

    def test_stop_process(self):
        """
        1. Starting loading docs into the default bucket
        2. Stop the requested process, which will impact the
           memcached operations
        3. Wait for load bucket task to complete
        4. Validate the docs for durability
        """
        error_to_simulate = self.input.param("simulate_error", None)
        def_bucket = self.bucket_util.buckets[0]
        target_node = self.getTargetNode()
        remote = RemoteMachineShellConnection(target_node)
        error_sim = CouchbaseError(self.log, remote)
        target_vbuckets = self.getVbucketNumbers(remote, def_bucket.name,
                                                 self.target_node)
        if len(target_vbuckets) == 0:
            self.log.error("No target vbucket list generated to load data")
            remote.disconnect()
            return

        # Create doc_generator targeting only the active/replica vbuckets
        # present in the target_node
        gen_load = doc_generator(self.key,
                                 self.num_items,
                                 self.new_docs_to_add,
                                 key_size=self.key_size,
                                 doc_size=self.doc_size,
                                 doc_type=self.doc_type,
                                 target_vbucket=target_vbuckets,
                                 vbuckets=self.cluster_util.vbuckets)

        if self.atomicity:
            task = self.task.async_load_gen_docs_atomicity(
                self.cluster,
                self.bucket_util.buckets,
                gen_load,
                "create",
                exp=0,
                batch_size=10,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                update_count=self.update_count,
                transaction_timeout=self.transaction_timeout,
                commit=True,
                sync=self.sync)
        else:
            task = self.task.async_load_gen_docs(
                self.cluster,
                def_bucket,
                gen_load,
                "create",
                exp=0,
                batch_size=1,
                process_concurrency=8,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                skip_read_on_error=True)

        # Induce the error condition
        error_sim.create(error_to_simulate)

        self.sleep(20, "Wait before reverting the error condition")
        # Revert the simulated error condition and close the ssh session
        error_sim.revert(error_to_simulate)
        remote.disconnect()

        # Wait for doc loading task to complete
        self.task.jython_task_manager.get_task_result(task)
        if not self.atomicity:
            if len(task.fail.keys()) != 0:
                if self.target_node == "active" or self.num_replicas in [2, 3]:
                    self.log_failure("Unwanted failures for keys: %s" %
                                     task.fail.keys())

            validate_passed = \
                self.durability_helper.validate_durability_exception(
                    task.fail,
                    SDKException.DurabilityAmbiguousException)
            if not validate_passed:
                self.log_failure("Unwanted exception seen during validation")

            # Create SDK connection for CRUD retries
            sdk_client = SDKClient([self.cluster.master], def_bucket)
            for doc_key, crud_result in task.fail.items():
                result = sdk_client.crud("create",
                                         doc_key,
                                         crud_result["value"],
                                         replicate_to=self.replicate_to,
                                         persist_to=self.persist_to,
                                         durability=self.durability_level,
                                         timeout=self.sdk_timeout)
                if result["status"] is False:
                    self.log_failure("Retry of doc_key %s failed: %s" %
                                     (doc_key, result["error"]))
            # Close the SDK connection
            sdk_client.close()

        # Update self.num_items
        self.num_items += self.new_docs_to_add

        if not self.atomicity:
            # Validate doc count
            self.bucket_util._wait_for_stats_all_buckets()
            self.bucket_util.verify_stats_all_buckets(self.num_items)

        self.validate_test_failure()

    def test_crash_process(self):
        """
        1. Starting loading docs into the default bucket
        2. Crash the requested process, which will not impact the
           memcached operations
        3. Wait for load bucket task to complete
        4. Validate the docs for durability
        """
        def_bucket = self.bucket_util.buckets[0]
        target_node = self.getTargetNode()
        remote = RemoteMachineShellConnection(target_node)
        target_vbuckets = range(0, self.cluster_util.vbuckets)
        retry_exceptions = list()

        # If Memcached is killed, we should not perform KV ops on
        # particular node. If not we can target all nodes for KV operation.
        if self.process_name == "memcached":
            target_vbuckets = self.getVbucketNumbers(remote, def_bucket.name,
                                                     self.target_node)
            if self.target_node == "active":
                retry_exceptions = [SDKException.TimeoutException]
        if len(target_vbuckets) == 0:
            self.log.error("No target vbucket list generated to load data")
            remote.disconnect()
            return

        # Create doc_generator targeting only the active/replica vbuckets
        # present in the target_node
        gen_load = doc_generator(self.key,
                                 self.num_items,
                                 self.new_docs_to_add,
                                 key_size=self.key_size,
                                 doc_size=self.doc_size,
                                 doc_type=self.doc_type,
                                 target_vbucket=target_vbuckets,
                                 vbuckets=self.cluster_util.vbuckets)
        if self.atomicity:
            task = self.task.async_load_gen_docs_atomicity(
                self.cluster,
                self.bucket_util.buckets,
                gen_load,
                "create",
                exp=0,
                batch_size=10,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                update_count=self.update_count,
                transaction_timeout=self.transaction_timeout,
                commit=True,
                sync=self.sync)
        else:
            task = self.task.async_load_gen_docs(
                self.cluster,
                def_bucket,
                gen_load,
                "create",
                exp=0,
                batch_size=10,
                process_concurrency=8,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                skip_read_on_error=True)

        task_info = dict()
        task_info[task] = self.bucket_util.get_doc_op_info_dict(
            def_bucket,
            "create",
            0,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            timeout=self.sdk_timeout,
            time_unit="seconds",
            retry_exceptions=retry_exceptions)

        self.sleep(10, "Wait for doc_ops to start")
        self.log.info("Killing {0}:{1} on node {2}".format(
            self.process_name, self.service_name, target_node.ip))
        remote.kill_process(self.process_name,
                            self.service_name,
                            signum=signum[self.sig_type])
        remote.disconnect()
        # Wait for tasks completion and validate failures
        if self.atomicity:
            self.task.jython_task_manager.get_task_result(task)
        if not self.atomicity:
            self.bucket_util.verify_doc_op_task_exceptions(
                task_info, self.cluster)
            self.bucket_util.log_doc_ops_task_failures(task_info)

        # Update self.num_items
        self.num_items += self.new_docs_to_add

        # Verification stats
        verification_dict = dict()
        verification_dict["ops_create"] = self.num_items
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["pending_writes"] = 0
        if self.durability_level:
            verification_dict["sync_write_committed_count"] = self.num_items

        if self.bucket_type == Bucket.Type.EPHEMERAL \
                and self.process_name == "memcached":
            self.sleep(10, "Wait for memcached to recover from the crash")
            result = self.task.rebalance(self.servers[:self.nodes_init], [],
                                         [])
            self.assertTrue(result, "Rebalance failed")

        # Validate doc count
        if not self.atomicity:
            self.bucket_util._wait_for_stats_all_buckets()
            self.bucket_util.verify_stats_all_buckets(self.num_items)

            if self.process_name != "memcached":
                stats_failed = \
                    self.durability_helper.verify_vbucket_details_stats(
                        def_bucket, self.cluster_util.get_kv_nodes(),
                        vbuckets=self.cluster_util.vbuckets,
                        expected_val=verification_dict)
                if stats_failed:
                    self.fail("Cbstats verification failed")

    def test_process_error_on_nodes(self):
        """
        Test to validate OoO returns feature
        1. Start parallel CRUDs using single client
        2. Perform process crash / stop with doc_ops in parallel
        3. Make sure no crash or ep_eng issue is seen with the err_simulation
        """
        tasks = list()
        node_data = dict()
        bucket = self.bucket_util.buckets[0]
        revert_errors = [
            CouchbaseError.STOP_MEMCACHED, CouchbaseError.STOP_SERVER,
            CouchbaseError.STOP_BEAMSMP, CouchbaseError.STOP_PERSISTENCE
        ]
        # Overriding sdk_timeout to max
        self.sdk_timeout = 60

        # Disable auto-failover to avoid failover of nodes
        status = RestConnection(self.cluster.master) \
            .update_autofailover_settings(False, 120, False)
        self.assertTrue(status, msg="Failure during disabling auto-failover")

        # Can take 'all_nodes' / 'single node'
        crash_on = self.input.param("crash_on", "single_node")
        error_to_simulate = self.input.param("simulate_error",
                                             CouchbaseError.KILL_MEMCACHED)
        num_times_to_affect = self.input.param("times_to_affect", 20)
        nodes_to_affect = self.cluster_util.get_kv_nodes()
        if crash_on == "single_node":
            nodes_to_affect = [choice(nodes_to_affect)]

        create_gen = doc_generator(self.key, self.num_items,
                                   self.num_items * 2)
        update_gen = doc_generator(self.key, 0, self.num_items / 2)
        delete_gen = doc_generator(self.key, self.num_items / 2,
                                   self.num_items)

        for node in nodes_to_affect:
            shell = RemoteMachineShellConnection(node)
            node_data[node] = dict()
            node_data[node]["cb_err"] = CouchbaseError(self.log, shell)

        self.log.info("Starting doc-ops")
        for doc_op in self.doc_ops:
            load_gen = update_gen
            if doc_op == DocLoading.Bucket.DocOps.CREATE:
                load_gen = create_gen
            elif doc_op == DocLoading.Bucket.DocOps.DELETE:
                load_gen = delete_gen
            task = self.task.async_load_gen_docs(
                self.cluster,
                bucket,
                load_gen,
                doc_op,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                sdk_client_pool=self.sdk_client_pool,
                batch_size=10,
                process_concurrency=1,
                skip_read_on_error=True,
                print_ops_rate=False)
            tasks.append(task)

        self.log.info("Starting error_simulation on %s" % nodes_to_affect)
        for itr in range(1, num_times_to_affect + 1):
            self.log.info("Iteration :: %d" % itr)
            for node in nodes_to_affect:
                node_data[node]["cb_err"].create(error_to_simulate,
                                                 bucket.name)
            if error_to_simulate in revert_errors:
                self.sleep(30, "Sleep before reverting the error")
                for node in nodes_to_affect:
                    node_data[node]["cb_err"].revert(error_to_simulate,
                                                     bucket.name)
            else:
                self.sleep(10, "Wait for process to come back online")

        # Wait for doc_ops to complete
        for task in tasks:
            self.task_manager.get_task_result(task)
    def test_sub_doc_with_process_crash(self):
        """
        Test to make sure durability will succeed even if a node goes down
        due to crash and has enough nodes to satisfy the durability

        1. Select a node from the cluster to simulate the specified error
        2. Perform CRUD on the target bucket with given timeout
        3. Using cbstats to verify the operation succeeds
        4. Validate all mutations are succeeded

        Note: self.sdk_timeout values is considered as 'seconds'
        """
        if self.num_replicas < 2:
            self.assertTrue(False, msg="Required: num_replicas > 1")

        # Override num_of_nodes affected to 1
        self.num_nodes_affected = 1

        error_sim = dict()
        shell_conn = dict()
        cbstat_obj = dict()
        failover_info = dict()
        vb_info_info = dict()
        active_vbs_in_target_nodes = list()
        failover_info["init"] = dict()
        failover_info["afterCrud"] = dict()
        vb_info_info["init"] = dict()
        vb_info_info["afterCrud"] = dict()
        def_bucket = self.bucket_util.buckets[0]

        self.load_data_for_sub_doc_ops()

        self.log.info("Selecting nodes to simulate error condition")
        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)

        self.log.info("Will simulate error condition on %s" % target_nodes)
        for node in target_nodes:
            # Create shell_connections
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(shell_conn[node.ip])
            active_vbs = cbstat_obj[node.ip].vbucket_list(
                def_bucket.name, "active")
            active_vbs_in_target_nodes += active_vbs
            vb_info_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                def_bucket.name)
            failover_info["init"][node.ip] = \
                cbstat_obj[node.ip].failover_stats(def_bucket.name)

            # Remove active vbuckets from doc_loading to avoid errors

        load_spec = dict()
        # load_spec["target_vbuckets"] = list(set(target_vbuckets)
        #                                    ^ set(active_vbs_in_target_nodes))
        load_spec["doc_crud"] = dict()
        load_spec["subdoc_crud"] = dict()
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.READ_PERCENTAGE_PER_COLLECTION] = 10
        load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.INSERT_PER_COLLECTION] = 50
        load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.UPSERT_PER_COLLECTION] = 25
        load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.REMOVE_PER_COLLECTION] = 25

        self.log.info("Perform 'create', 'update', 'delete' mutations")

        doc_loading_task = \
            self.bucket_util.run_scenario_from_spec(
                self.task,
                self.cluster,
                self.bucket_util.buckets,
                load_spec,
                mutation_num=1,
                async_load=True)

        self.sleep(5, "Wait for doc loaders to start loading data")

        for node in target_nodes:
            # Perform specified action
            error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip])
            error_sim[node.ip].create(self.simulate_error,
                                      bucket_name=def_bucket.name)

        # Perform new scope/collection creation during doc ops in parallel
        self.__perform_collection_crud(mutation_num=2)

        # Wait for document_loader tasks to complete
        self.task_manager.get_task_result(doc_loading_task)
        self.bucket_util.validate_doc_loading_results(doc_loading_task)
        if doc_loading_task.result is False:
            self.log_failure("Sub_doc CRUDs failed with process crash")

        # Revert the induced error condition
        for node in target_nodes:
            error_sim[node.ip].revert(self.simulate_error,
                                      bucket_name=def_bucket.name)

        # Fetch latest failover stats and validate the values are updated
        self.log.info("Validating failover and seqno cbstats")
        for node in target_nodes:
            vb_info_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].vbucket_seqno(def_bucket.name)
            failover_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].failover_stats(def_bucket.name)

            # Failover validation
            val = \
                failover_info["init"][node.ip] \
                == failover_info["afterCrud"][node.ip]
            error_msg = "Failover stats not updated after error condition"
            self.assertTrue(val, msg=error_msg)

            # Seq_no validation (High level)
            val = \
                vb_info_info["init"][node.ip] \
                != vb_info_info["afterCrud"][node.ip]
            self.assertTrue(val, msg="vbucket seq_no not updated after CRUDs")

        # Disconnect the shell connection
        for node in target_nodes:
            shell_conn[node.ip].disconnect()

        self.validate_test_failure()
        # Doc count validation
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.validate_docs_per_collections_all_buckets()
示例#27
0
class CrashTest(CollectionBase):
    def setUp(self):
        super(CrashTest, self).setUp()

        self.doc_ops = self.input.param("doc_ops", None)
        self.process_name = self.input.param("process", None)
        self.service_name = self.input.param("service", "data")
        self.sig_type = self.input.param("sig_type", "SIGKILL").upper()
        self.target_node = self.input.param("target_node", "active")
        self.client_type = self.input.param("client_type", "sdk").lower()
        self.N1qltxn = self.input.param("N1qltxn", False)

        self.pre_warmup_stats = dict()
        self.timeout = 120
        self.new_docs_to_add = 10000

        if self.doc_ops is not None:
            self.doc_ops = self.doc_ops.split(";")

        if not self.atomicity:
            self.durability_helper = DurabilityHelper(
                self.log, self.nodes_init,
                durability=self.durability_level,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to)

        verification_dict = dict()
        verification_dict["ops_create"] = \
            self.cluster.buckets[0].scopes[
                CbServer.default_scope].collections[
                CbServer.default_collection].num_items
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["pending_writes"] = 0
        if self.durability_level:
            verification_dict["sync_write_committed_count"] = \
                verification_dict["ops_create"]

        # Load initial documents into the buckets
        transaction_gen_create = doc_generator(
            "transaction_key", 0, self.num_items,
            key_size=self.key_size,
            doc_size=self.doc_size,
            doc_type=self.doc_type,
            target_vbucket=self.target_vbucket,
            vbuckets=self.cluster_util.vbuckets)
        gen_create = doc_generator(
            self.key, 0, self.num_items,
            key_size=self.key_size,
            doc_size=self.doc_size,
            doc_type=self.doc_type,
            target_vbucket=self.target_vbucket,
            vbuckets=self.cluster_util.vbuckets)
        if self.atomicity:
            transaction_task = self.task.async_load_gen_docs_atomicity(
                self.cluster, self.cluster.buckets,
                transaction_gen_create, DocLoading.Bucket.DocOps.CREATE,
                exp=0,
                batch_size=10,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                update_count=self.update_count,
                transaction_timeout=self.transaction_timeout,
                commit=True,
                sync=self.sync)
            self.task.jython_task_manager.get_task_result(transaction_task)
        for bucket in self.cluster.buckets:
            task = self.task.async_load_gen_docs(
                self.cluster, bucket, gen_create,
                DocLoading.Bucket.DocOps.CREATE, self.maxttl,
                persist_to=self.persist_to,
                replicate_to=self.replicate_to,
                durability=self.durability_level,
                batch_size=10, process_concurrency=8)
            self.task.jython_task_manager.get_task_result(task)
            self.bucket_util._wait_for_stats_all_buckets(self.cluster.buckets)

            self.cluster.buckets[0].scopes[
                CbServer.default_scope].collections[
                CbServer.default_collection].num_items += self.num_items
            verification_dict["ops_create"] += self.num_items
            if self.durability_level:
                verification_dict["sync_write_committed_count"] += \
                    self.num_items
            # Verify cbstats vbucket-details
            stats_failed = self.durability_helper.verify_vbucket_details_stats(
                bucket, self.cluster_util.get_kv_nodes(),
                vbuckets=self.cluster_util.vbuckets,
                expected_val=verification_dict)

            if self.atomicity is False:
                if stats_failed:
                    self.fail("Cbstats verification failed")
                self.bucket_util.verify_stats_all_buckets(
                    self.cluster,
                    self.cluster.buckets[0].scopes[
                        CbServer.default_scope].collections[
                        CbServer.default_collection].num_items)
        self.bucket = self.cluster.buckets[0]
        if self.N1qltxn:
            self.n1ql_server = self.cluster_util.get_nodes_from_services_map(
                                service_type="n1ql",
                                get_all_nodes=True)
            self.n1ql_helper = N1QLHelper(server=self.n1ql_server,
                                          use_rest=True,
                                          buckets=self.cluster.buckets,
                                          log=self.log,
                                          scan_consistency='REQUEST_PLUS',
                                          num_collection=3,
                                          num_buckets=1,
                                          num_savepoints=1,
                                          override_savepoint=False,
                                          num_stmt=10,
                                          load_spec=self.data_spec_name)
            self.bucket_col = self.n1ql_helper.get_collections()
            self.stmts = self.n1ql_helper.get_stmt(self.bucket_col)
            self.stmts = self.n1ql_helper.create_full_stmts(self.stmts)
        self.log.info("==========Finished CrashTest setup========")

    def tearDown(self):
        super(CrashTest, self).tearDown()

    def getTargetNode(self):
        if len(self.cluster.nodes_in_cluster) > 1:
            return self.cluster.nodes_in_cluster[randint(0, self.nodes_init-1)]
        return self.cluster.master

    def start_doc_loading_tasks(self, target_vbuckets,
                                scope_name, collection_obj):
        # Create doc_generator targeting only the active/replica vbuckets
        # present in the target_node
        transaction_gen_load = doc_generator(
            "transaction_key", self.num_items, self.new_docs_to_add,
            key_size=self.key_size,
            doc_size=self.doc_size,
            doc_type=self.doc_type,
            target_vbucket=target_vbuckets,
            vbuckets=self.cluster_util.vbuckets)
        gen_load = doc_generator(
            self.key, self.num_items, self.new_docs_to_add,
            key_size=self.key_size,
            doc_size=self.doc_size,
            doc_type=self.doc_type,
            target_vbucket=target_vbuckets,
            vbuckets=self.cluster_util.vbuckets)
        if self.atomicity:
            self.transaction_load_task = \
                self.task.async_load_gen_docs_atomicity(
                    self.cluster, self.cluster.buckets,
                    transaction_gen_load, DocLoading.Bucket.DocOps.CREATE,
                    exp=0,
                    batch_size=10,
                    process_concurrency=self.process_concurrency,
                    replicate_to=self.replicate_to,
                    persist_to=self.persist_to,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout,
                    update_count=self.update_count,
                    transaction_timeout=self.transaction_timeout,
                    commit=True,
                    sync=self.sync)
            collection_obj.num_items += self.new_docs_to_add
        elif self.N1qltxn:
            self.N1ql_load_task = self.task.async_n1qlTxn_query(
                self.stmts,
                n1ql_helper=self.n1ql_helper,
                commit=True,
                scan_consistency="REQUEST_PLUS")
        self.doc_loading_task = self.task.async_load_gen_docs(
            self.cluster, self.bucket, gen_load,
            DocLoading.Bucket.DocOps.CREATE,
            exp=0,
            batch_size=10,
            process_concurrency=8,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            timeout_secs=self.sdk_timeout,
            scope=scope_name, collection=collection_obj.name,
            skip_read_on_error=True)
        collection_obj.num_items += self.new_docs_to_add

    @staticmethod
    def getVbucketNumbers(shell_conn, bucket_name, replica_type):
        cb_stats = Cbstats(shell_conn)
        return cb_stats.vbucket_list(bucket_name, replica_type)

    def test_create_remove_scope_with_node_crash(self):
        """
        1. Select a error scenario to simulate in random
        2. Create error scenario either before or after scope create/delete
        3. Initiate scope creation/deletion under the bucket
        4. Validate the outcome of scope creation/deletion
        """
        def create_scope(client_type, bucket_obj, scope):
            if client_type == "sdk":
                client.create_scope(scope)
                self.bucket_util.create_scope_object(bucket_obj,
                                                     {"name": scope})
            elif client_type == "rest":
                self.bucket_util.create_scope(self.cluster.master, bucket_obj,
                                              {"name": scope})
            else:
                self.log_failure("Invalid client_type provided")

        def remove_scope(client_type, bucket_obj, scope):
            if client_type == "sdk":
                client.drop_scope(scope)
                self.bucket_util.mark_scope_as_dropped(bucket_obj, scope)
            elif client_type == "rest":
                self.bucket_util.drop_scope(self.cluster.master,
                                            bucket_obj,
                                            scope)
            else:
                self.log_failure("Invalid client_type provided")

        kv_nodes = self.cluster_util.get_kv_nodes()
        if len(kv_nodes) == 1:
            self.fail("Need atleast two KV nodes to run this test")

        client = None
        task = None
        action = self.input.param("action", "create")
        crash_during = self.input.param("crash_during", "pre_action")
        data_load_option = self.input.param("data_load_option", None)
        crash_type = self.input.param("simulate_error",
                                      CouchbaseError.KILL_MEMCACHED)

        # Always use a random scope name to create/remove
        # since CREATE/DROP not supported for default scope
        self.scope_name = \
            BucketUtils.get_random_name(max_length=CbServer.max_scope_name_len)

        # Select a KV node other than master node from the cluster
        node_to_crash = kv_nodes[sample(range(1, len(kv_nodes)), 1)[0]]

        client = self.sdk_client_pool.get_client_for_bucket(self.bucket)
        use_client = sample(["sdk", "rest"], 1)[0]
        if action == "remove":
            # Create a scope to be removed
            create_scope(use_client, self.bucket, self.scope_name)

        # Create a error scenario
        shell = RemoteMachineShellConnection(node_to_crash)
        cb_error = CouchbaseError(self.log, shell)
        cbstat_obj = Cbstats(shell)
        active_vbs = cbstat_obj.vbucket_list(self.bucket.name,
                                             vbucket_type="active")
        target_vbuckets = list(
            set(range(0, 1024)).difference(set(active_vbs)))
        doc_gen = doc_generator(self.key, 0, 1000,
                                target_vbucket=target_vbuckets)

        if crash_during == "pre_action":
            cb_error.create(crash_type)

        if data_load_option == "mutate_default_collection":
            task = self.task.async_load_gen_docs(
                self.cluster, self.bucket, doc_gen,
                DocLoading.Bucket.DocOps.UPDATE,
                exp=self.maxttl,
                batch_size=200, process_concurrency=4,
                compression=self.sdk_compression,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                sdk_client_pool=self.sdk_client_pool)

        if action == "create":
            create_scope(self.client_type, self.bucket, self.scope_name)
        elif action == "remove":
            remove_scope(self.client_type, self.bucket, self.scope_name)

        if crash_during == "post_action":
            cb_error.create(crash_type)

        self.sleep(60, "Wait before reverting the error scenario")
        cb_error.revert(crash_type)

        if data_load_option == "mutate_default_collection":
            self.task_manager.get_task_result(task)

        # Close SSH and SDK connections
        shell.disconnect()
        if self.atomicity is False:
            self.bucket_util.validate_docs_per_collections_all_buckets(
                self.cluster)
        self.validate_test_failure()

    def test_create_remove_collection_with_node_crash(self):
        """
        1. Select a error scenario to simulate in random
        2. Create error scenario either before or after collection action
        3. Initiate collection creation/deletion under the bucket
        4. Validate the outcome of collection creation/deletion
        """
        def create_collection(client_type, bucket_obj, scope, collection):
            if client_type == "sdk":
                client.create_collection(collection, scope)
                self.bucket_util.create_collection_object(bucket_obj, scope,
                                                          {"name": collection})
            elif client_type == "rest":
                self.bucket_util.create_collection(self.cluster.master,
                                                   bucket_obj,
                                                   scope,
                                                   {"name": collection})
            else:
                self.log_failure("Invalid client_type provided")

        def remove_collection(client_type, bucket_obj, scope, collection):
            if client_type == "sdk":
                client.drop_collection(scope, collection)
                self.bucket_util.mark_collection_as_dropped(bucket_obj, scope,
                                                            collection)
            elif client_type == "rest":
                self.bucket_util.drop_collection(self.cluster.master,
                                                 bucket_obj, scope, collection)
            else:
                self.log_failure("Invalid client_type provided")

        kv_nodes = self.cluster_util.get_kv_nodes()
        if len(kv_nodes) == 1:
            self.fail("Need atleast two KV nodes to run this test")

        client = None
        task = None
        action = self.input.param("action", "create")
        crash_during = self.input.param("crash_during", "pre_action")
        data_load_option = self.input.param("data_load_option", None)
        crash_type = self.input.param("simulate_error",
                                      CouchbaseError.KILL_MEMCACHED)

        if self.scope_name != CbServer.default_scope:
            self.scope_name = \
                BucketUtils.get_random_name(
                    max_length=CbServer.max_scope_name_len)
            self.bucket_util.create_scope(self.cluster.master, self.bucket,
                                          {"name": self.scope_name})
        if self.collection_name != CbServer.default_collection:
            self.collection_name = \
                BucketUtils.get_random_name(
                    max_length=CbServer.max_collection_name_len)

        # Select a KV node other than master node from the cluster
        node_to_crash = kv_nodes[sample(range(1, len(kv_nodes)), 1)[0]]

        client = self.sdk_client_pool.get_client_for_bucket(self.bucket)
        use_client = sample(["sdk", "rest"], 1)[0]

        if action == "remove" \
                and self.collection_name != CbServer.default_collection:
            # Create a collection to be removed
            create_collection(use_client, self.bucket,
                              self.scope_name, self.collection_name)

        # Create a error scenario
        self.log.info("Selected scenario for test '%s'" % crash_type)
        shell = RemoteMachineShellConnection(node_to_crash)
        cb_error = CouchbaseError(self.log, shell)
        cbstat_obj = Cbstats(shell)
        active_vbs = cbstat_obj.vbucket_list(self.bucket.name,
                                             vbucket_type="active")
        target_vbuckets = list(
            set(range(0, 1024)).difference(set(active_vbs)))
        doc_gen = doc_generator(self.key, 0, 1000,
                                target_vbucket=target_vbuckets)

        if crash_during == "pre_action":
            cb_error.create(crash_type)

        if data_load_option == "mutate_default_collection":
            task = self.task.async_load_gen_docs(
                self.cluster, self.bucket, doc_gen,
                DocLoading.Bucket.DocOps.UPDATE,
                exp=self.maxttl,
                batch_size=200, process_concurrency=8,
                compression=self.sdk_compression,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout)

        if action == "create":
            create_collection(self.client_type, self.bucket,
                              self.scope_name, self.collection_name)
        elif action == "remove":
            remove_collection(self.client_type, self.bucket,
                              self.scope_name, self.collection_name)

        if crash_during == "post_action":
            cb_error.create(crash_type)

        if data_load_option == "mutate_default_collection":
            self.task_manager.get_task_result(task)

        self.sleep(60, "Wait before reverting the error scenario")
        cb_error.revert(crash_type)

        # Close SSH and SDK connections
        shell.disconnect()
        if self.atomicity is False:
            self.bucket_util.validate_docs_per_collections_all_buckets(
                self.cluster)
        self.validate_test_failure()

    def test_stop_process(self):
        """
        1. Starting loading docs into the default bucket
        2. Stop the requested process, which will impact the
           memcached operations
        3. Wait for load bucket task to complete
        4. Validate the docs for durability
        """
        error_to_simulate = self.input.param("simulate_error", None)
        target_node = self.getTargetNode()
        remote = RemoteMachineShellConnection(target_node)
        error_sim = CouchbaseError(self.log, remote)
        target_vbuckets = CrashTest.getVbucketNumbers(
            remote, self.bucket.name, self.target_node)

        bucket_dict = BucketUtils.get_random_collections(
            self.cluster.buckets,
            req_num=1,
            consider_scopes="all",
            consider_buckets="all")

        bucket = BucketUtils.get_bucket_obj(self.cluster.buckets,
                                            bucket_dict.keys()[0])
        scope_name = bucket_dict[bucket.name]["scopes"].keys()[0]
        collection_name = bucket_dict[bucket.name][
            "scopes"][scope_name]["collections"].keys()[0]
        scope = BucketUtils.get_scope_obj(
            bucket, scope_name)
        collection = BucketUtils.get_collection_obj(scope, collection_name)

        if len(target_vbuckets) == 0:
            self.log.error("No target vbucket list generated to load data")
            remote.disconnect()
            return

        self.start_doc_loading_tasks(target_vbuckets, scope_name, collection)

        # Induce the error condition
        error_sim.create(error_to_simulate)

        self.sleep(20, "Wait before reverting the error condition")
        # Revert the simulated error condition and close the ssh session
        error_sim.revert(error_to_simulate)
        remote.disconnect()

        # Wait for doc loading task to complete
        self.task.jython_task_manager.get_task_result(self.doc_loading_task)
        if self.atomicity:
            self.task.jython_task_manager.get_task_result(
                self.transaction_load_task)
        elif self.N1qltxn:
            self.task.jython_task_manager.get_task_result(
                self.N1ql_load_task)

        if len(self.doc_loading_task.fail.keys()) != 0:
            if self.target_node == "active" or self.num_replicas in [2, 3]:
                self.log_failure("Unwanted failures for keys: %s"
                                 % self.doc_loading_task.fail.keys())

        validate_passed = \
            self.durability_helper.validate_durability_exception(
                self.doc_loading_task.fail,
                SDKException.DurabilityAmbiguousException)
        if not validate_passed:
            self.log_failure("Unwanted exception seen during validation")

        # Get SDK client for CRUD retries
        sdk_client = self.sdk_client_pool.get_client_for_bucket(self.bucket)
        for doc_key, crud_result in self.doc_loading_task.fail.items():
            result = sdk_client.crud(DocLoading.Bucket.DocOps.CREATE,
                                     doc_key,
                                     crud_result["value"],
                                     replicate_to=self.replicate_to,
                                     persist_to=self.persist_to,
                                     durability=self.durability_level,
                                     timeout=self.sdk_timeout)
            if result["status"] is False:
                self.log_failure("Retry of doc_key %s failed: %s"
                                 % (doc_key, result["error"]))
        # Close the SDK connection
        self.sdk_client_pool.release_client(sdk_client)

        self.validate_test_failure()

        self.bucket_util._wait_for_stats_all_buckets(self.cluster.buckets)
        # Update self.num_items and validate docs per collection
        if not self.N1qltxn and self.atomicity is False:
            self.bucket_util.validate_docs_per_collections_all_buckets(
                self.cluster)

    def test_crash_process(self):
        """
        1. Starting loading docs into the default bucket
        2. Crash the requested process, which will not impact the
           memcached operations
        3. Wait for load bucket task to complete
        4. Validate the docs for durability
        """
        def_bucket = self.cluster.buckets[0]
        target_node = self.getTargetNode()
        remote = RemoteMachineShellConnection(target_node)
        target_vbuckets = range(0, self.cluster_util.vbuckets)
        retry_exceptions = list()
        self.transaction_load_task = None
        self.doc_loading_task = None
        self.N1ql_load_task = None

        # If Memcached is killed, we should not perform KV ops on
        # particular node. If not we can target all nodes for KV operation.
        if self.process_name == "memcached":
            target_vbuckets = CrashTest.getVbucketNumbers(
                remote, def_bucket.name, self.target_node)
            if self.target_node == "active":
                retry_exceptions = [SDKException.TimeoutException]
        if len(target_vbuckets) == 0:
            self.log.error("No target vbucket list generated to load data")
            remote.disconnect()
            return

        bucket_dict = BucketUtils.get_random_collections(
            self.cluster.buckets,
            req_num=1,
            consider_scopes="all",
            consider_buckets="all")

        bucket = BucketUtils.get_bucket_obj(self.cluster.buckets,
                                            bucket_dict.keys()[0])
        scope_name = bucket_dict[bucket.name]["scopes"].keys()[0]
        collection_name = bucket_dict[bucket.name][
            "scopes"][scope_name]["collections"].keys()[0]
        scope = BucketUtils.get_scope_obj(
            bucket, scope_name)
        collection = BucketUtils.get_collection_obj(
            scope, collection_name)

        self.start_doc_loading_tasks(target_vbuckets, scope_name, collection)

        task_info = dict()
        task_info[self.doc_loading_task] = \
            self.bucket_util.get_doc_op_info_dict(
                def_bucket, DocLoading.Bucket.DocOps.CREATE, 0,
                replicate_to=self.replicate_to, persist_to=self.persist_to,
                durability=self.durability_level,
                timeout=self.sdk_timeout, time_unit="seconds",
                retry_exceptions=retry_exceptions)

        self.sleep(10, "Wait for doc_ops to start")
        self.log.info("Killing {0}:{1} on node {2}"
                      .format(self.process_name, self.service_name,
                              target_node.ip))
        remote.kill_process(self.process_name, self.service_name,
                            signum=signum[self.sig_type])
        remote.disconnect()
        # Wait for tasks completion and validate failures
        if self.transaction_load_task:
            self.task.jython_task_manager.get_task_result(
                self.transaction_load_task)
        if self.N1qltxn:
            self.task.jython_task_manager.get_task_result(
                self.N1ql_load_task)
        self.task_manager.get_task_result(self.doc_loading_task)
        self.bucket_util.verify_doc_op_task_exceptions(task_info,
                                                       self.cluster)
        self.bucket_util.log_doc_ops_task_failures(task_info)

        # Verification stats
        verification_dict = dict()
        verification_dict["ops_create"] = 2*self.num_items
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["pending_writes"] = 0
        if self.durability_level:
            verification_dict["sync_write_committed_count"] = 2*self.num_items

        if self.bucket_type == Bucket.Type.EPHEMERAL \
                and self.process_name == "memcached":
            result = self.task.rebalance(self.servers[:self.nodes_init],
                                         [], [])
            self.assertTrue(result, "Rebalance failed")

        # Validate doc count
        if self.process_name != "memcached":
            stats_failed = \
                self.durability_helper.verify_vbucket_details_stats(
                    def_bucket, self.cluster_util.get_kv_nodes(),
                    vbuckets=self.cluster_util.vbuckets,
                    expected_val=verification_dict)
            if stats_failed:
                self.fail("Cbstats verification failed")

        # Doc count validation per collection
        if not self.N1qltxn and self.atomicity is False:
            self.bucket_util.validate_docs_per_collections_all_buckets(
                self.cluster)
示例#28
0
class UpgradeTests(UpgradeBase):
    def setUp(self):
        super(UpgradeTests, self).setUp()
        self.durability_helper = DurabilityHelper(
            self.log,
            len(self.cluster.nodes_in_cluster))
        self.verification_dict = dict()
        self.verification_dict["ops_create"] = self.num_items
        self.verification_dict["ops_delete"] = 0

    def tearDown(self):
        super(UpgradeTests, self).tearDown()

    def __trigger_cbcollect(self, log_path):
        self.log.info("Triggering cb_collect_info")
        rest = RestConnection(self.cluster.master)
        nodes = rest.get_nodes()
        status = self.cluster_util.trigger_cb_collect_on_cluster(rest, nodes)

        if status is True:
            self.cluster_util.wait_for_cb_collect_to_complete(rest)
            status = self.cluster_util.copy_cb_collect_logs(
                rest, nodes, self.cluster, log_path)
            if status is False:
                self.log_failure("API copy_cb_collect_logs detected failure")
        else:
            self.log_failure("API perform_cb_collect returned False")
        return status

    def __play_with_collection(self):
        # Client based scope/collection crud tests
        client = self.sdk_client_pool.get_client_for_bucket(self.bucket)
        scope_name = self.bucket_util.get_random_name(
            max_length=CbServer.max_scope_name_len)
        collection_name = self.bucket_util.get_random_name(
            max_length=CbServer.max_collection_name_len)

        # Create scope using SDK client
        client.create_scope(scope_name)
        # Create collection under default scope and custom scope
        client.create_collection(collection_name, CbServer.default_scope)
        client.create_collection(collection_name, scope_name)
        # Drop created collections
        client.drop_collection(CbServer.default_scope, collection_name)
        client.drop_collection(scope_name, collection_name)
        # Drop created scope using SDK client
        client.drop_scope(scope_name)

        # MB-44092 - Collection load not working with pre-existing connections
        DocLoaderUtils.sdk_client_pool = SDKClientPool()
        self.log.info("Creating required SDK clients for client_pool")
        for bucket in self.bucket_util.buckets:
            DocLoaderUtils.sdk_client_pool.create_clients(
                bucket, [self.cluster.master], 1,
                compression_settings=self.sdk_compression)

        # Create scopes/collections phase
        collection_load_spec = \
            self.bucket_util.get_crud_template_from_package("initial_load")
        collection_load_spec["doc_crud"][
            MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 0
        collection_load_spec["doc_crud"][
            MetaCrudParams.DocCrud.NUM_ITEMS_FOR_NEW_COLLECTIONS] = 5000
        collection_load_spec[
            MetaCrudParams.SCOPES_TO_ADD_PER_BUCKET] = 5
        collection_load_spec[
            MetaCrudParams.COLLECTIONS_TO_ADD_FOR_NEW_SCOPES] = 10
        collection_load_spec[
            MetaCrudParams.COLLECTIONS_TO_ADD_PER_BUCKET] = 50
        collection_task = \
            self.bucket_util.run_scenario_from_spec(self.task,
                                                    self.cluster,
                                                    self.bucket_util.buckets,
                                                    collection_load_spec,
                                                    mutation_num=1,
                                                    batch_size=500)
        if collection_task.result is False:
            self.log_failure("Collection task failed")
            return
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util._wait_for_stats_all_buckets(cbstat_cmd="all",
                                                     stat_name="ep_queue_size",
                                                     timeout=60)
        self.bucket_util.validate_docs_per_collections_all_buckets()

        # Drop and recreate scope/collections
        collection_load_spec = \
            self.bucket_util.get_crud_template_from_package("initial_load")
        collection_load_spec["doc_crud"][
            MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 0
        collection_load_spec[MetaCrudParams.COLLECTIONS_TO_DROP] = 10
        collection_load_spec[MetaCrudParams.SCOPES_TO_DROP] = 2
        collection_task = \
            self.bucket_util.run_scenario_from_spec(self.task,
                                                    self.cluster,
                                                    self.bucket_util.buckets,
                                                    collection_load_spec,
                                                    mutation_num=1,
                                                    batch_size=500)
        if collection_task.result is False:
            self.log_failure("Drop scope/collection failed")
            return

        # MB-44092 - Close client_pool after collection ops
        DocLoaderUtils.sdk_client_pool.shutdown()

        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util._wait_for_stats_all_buckets(cbstat_cmd="all",
                                                     stat_name="ep_queue_size",
                                                     timeout=60)
        self.bucket_util.validate_docs_per_collections_all_buckets()

    def test_upgrade(self):
        create_batch_size = 10000
        update_task = None

        t_durability_level = ""
        if self.cluster_supports_sync_write:
            t_durability_level = Bucket.DurabilityLevel.MAJORITY

        if self.upgrade_with_data_load:
            self.log.info("Starting async doc updates")
            update_task = self.task.async_continuous_doc_ops(
                self.cluster, self.bucket, self.gen_load,
                op_type=DocLoading.Bucket.DocOps.UPDATE,
                process_concurrency=1,
                persist_to=1,
                replicate_to=1,
                durability=t_durability_level,
                timeout_secs=30)

        create_gen = doc_generator(self.key, self.num_items,
                                   self.num_items+create_batch_size)
        self.log.info("Upgrading cluster nodes to target version")
        node_to_upgrade = self.fetch_node_to_upgrade()
        while node_to_upgrade is not None:
            self.log.info("Selected node for upgrade: %s"
                          % node_to_upgrade.ip)
            self.upgrade_function[self.upgrade_type](node_to_upgrade,
                                                     self.upgrade_version)
            self.cluster_util.print_cluster_stats()

            # Validate sync_write results after upgrade
            if self.atomicity:
                create_batch_size = 10
                create_gen = doc_generator(
                    self.key,
                    self.num_items,
                    self.num_items+create_batch_size)
                sync_write_task = self.task.async_load_gen_docs_atomicity(
                    self.cluster, self.bucket_util.buckets,
                    create_gen, DocLoading.Bucket.DocOps.CREATE,
                    process_concurrency=1,
                    transaction_timeout=self.transaction_timeout,
                    record_fail=True)
            else:
                sync_write_task = self.task.async_load_gen_docs(
                    self.cluster, self.bucket, create_gen,
                    DocLoading.Bucket.DocOps.CREATE,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout,
                    sdk_client_pool=self.sdk_client_pool,
                    process_concurrency=4,
                    skip_read_on_error=True,
                    suppress_error_table=True)
            self.task_manager.get_task_result(sync_write_task)

            node_to_upgrade = self.fetch_node_to_upgrade()
            if self.atomicity:
                self.sleep(10)
                current_items = self.bucket_util.get_bucket_current_item_count(
                    self.cluster, self.bucket)
                if node_to_upgrade is None:
                    if current_items < self.num_items+create_batch_size:
                        self.log_failure(
                            "Failures after cluster upgrade {} {}"
                            .format(current_items,
                                    self.num_items+create_batch_size))
                elif current_items > self.num_items:
                    self.log_failure(
                        "SyncWrite succeeded with mixed mode cluster")
            else:
                if node_to_upgrade is None:
                    if sync_write_task.fail.keys():
                        self.log_failure("Failures after cluster upgrade")
                    else:
                        self.num_items += create_batch_size
                        self.bucket.scopes[
                            CbServer.default_scope].collections[
                            CbServer.default_collection] \
                            .num_items += create_batch_size
                elif self.cluster_supports_sync_write:
                    if sync_write_task.fail:
                        self.log.error("SyncWrite failed: %s"
                                       % sync_write_task.fail)
                        self.log_failure("SyncWrite failed during upgrade")
                    else:
                        self.num_items += create_batch_size
                        self.bucket.scopes[
                            CbServer.default_scope].collections[
                            CbServer.default_collection] \
                            .num_items += create_batch_size
                        create_gen = doc_generator(
                            self.key,
                            self.num_items,
                            self.num_items + create_batch_size)
                elif len(sync_write_task.fail.keys()) != create_batch_size:
                    self.log_failure(
                        "SyncWrite succeeded with mixed mode cluster")
                else:
                    for doc_id, doc_result in sync_write_task.fail.items():
                        if SDKException.FeatureNotAvailableException \
                                not in str(doc_result["error"]):
                            self.log_failure("Invalid exception for %s: %s"
                                             % (doc_id, doc_result))

            # Halt further upgrade if test has failed during current upgrade
            if self.test_failure is not None:
                break

        # Validate default collection stats before collection ops
        self.bucket_util._wait_for_stats_all_buckets(cbstat_cmd="all",
                                                     stat_name="ep_queue_size",
                                                     timeout=60)
        self.bucket_util.validate_docs_per_collections_all_buckets()

        # Play with collection if upgrade was successful
        if not self.test_failure:
            self.__play_with_collection()

        if self.upgrade_with_data_load:
            # Wait for update_task to complete
            update_task.end_task()
            self.task_manager.get_task_result(update_task)

        self.validate_test_failure()

    def test_bucket_durability_upgrade(self):
        update_task = None
        self.sdk_timeout = 60
        create_batch_size = 10000
        if self.atomicity:
            create_batch_size = 10

        # To make sure sync_write can we supported by initial cluster version
        sync_write_support = True
        if float(self.initial_version[0:3]) < 6.5:
            sync_write_support = False

        if sync_write_support:
            self.verification_dict["rollback_item_count"] = 0
            self.verification_dict["sync_write_aborted_count"] = 0

        if self.upgrade_with_data_load:
            self.log.info("Starting async doc updates")
            update_task = self.task.async_continuous_doc_ops(
                self.cluster, self.bucket, self.gen_load,
                op_type=DocLoading.Bucket.DocOps.UPDATE,
                process_concurrency=1,
                persist_to=1,
                replicate_to=1,
                timeout_secs=30)

        self.log.info("Upgrading cluster nodes to target version")
        node_to_upgrade = self.fetch_node_to_upgrade()
        while node_to_upgrade is not None:
            self.log.info("Selected node for upgrade: %s"
                          % node_to_upgrade.ip)
            self.upgrade_function[self.upgrade_type](node_to_upgrade,
                                                     self.upgrade_version)
            try:
                self.cluster.update_master_using_diag_eval(
                    self.cluster.servers[0])
            except Exception:
                self.cluster.update_master_using_diag_eval(
                    self.cluster.servers[self.nodes_init-1])

            create_gen = doc_generator(self.key, self.num_items,
                                       self.num_items+create_batch_size)
            # Validate sync_write results after upgrade
            if self.atomicity:
                sync_write_task = self.task.async_load_gen_docs_atomicity(
                    self.cluster, self.bucket_util.buckets,
                    create_gen, DocLoading.Bucket.DocOps.CREATE,
                    process_concurrency=1,
                    transaction_timeout=self.transaction_timeout,
                    record_fail=True)
            else:
                sync_write_task = self.task.async_load_gen_docs(
                    self.cluster, self.bucket, create_gen,
                    DocLoading.Bucket.DocOps.CREATE,
                    timeout_secs=self.sdk_timeout,
                    process_concurrency=4,
                    sdk_client_pool=self.sdk_client_pool,
                    skip_read_on_error=True,
                    suppress_error_table=True)
            self.task_manager.get_task_result(sync_write_task)
            self.num_items += create_batch_size

            retry_index = 0
            while retry_index < 5:
                self.sleep(3, "Wait for num_items to match")
                current_items = self.bucket_util.get_bucket_current_item_count(
                    self.cluster, self.bucket)
                if current_items == self.num_items:
                    break
                self.log.debug("Num_items mismatch. Expected: %s, Actual: %s"
                               % (self.num_items, current_items))
            # Doc count validation
            self.cluster_util.print_cluster_stats()

            self.verification_dict["ops_create"] += create_batch_size
            self.summary.add_step("Upgrade %s" % node_to_upgrade.ip)

            # Halt further upgrade if test has failed during current upgrade
            if self.test_failure:
                break

            node_to_upgrade = self.fetch_node_to_upgrade()

        if self.upgrade_with_data_load:
            # Wait for update_task to complete
            update_task.end_task()
            self.task_manager.get_task_result(update_task)
        else:
            self.verification_dict["ops_update"] = 0

        # Cb_stats vb-details validation
        failed = self.durability_helper.verify_vbucket_details_stats(
            self.bucket_util.buckets[0],
            self.cluster_util.get_kv_nodes(),
            vbuckets=self.cluster_util.vbuckets,
            expected_val=self.verification_dict)
        if failed:
            self.log_failure("Cbstat vbucket-details validation failed")
        self.summary.add_step("Cbstats vb-details verification")

        self.validate_test_failure()

        possible_d_levels = dict()
        possible_d_levels[Bucket.Type.MEMBASE] = \
            self.bucket_util.get_supported_durability_levels()
        possible_d_levels[Bucket.Type.EPHEMERAL] = [
            Bucket.DurabilityLevel.NONE,
            Bucket.DurabilityLevel.MAJORITY]
        len_possible_d_levels = len(possible_d_levels[self.bucket_type]) - 1

        if not sync_write_support:
            self.verification_dict["rollback_item_count"] = 0
            self.verification_dict["sync_write_aborted_count"] = 0

        # Perform bucket_durability update
        key, value = doc_generator("b_durability_doc", 0, 1).next()
        client = SDKClient([self.cluster.master], self.bucket_util.buckets[0])
        for index, d_level in enumerate(possible_d_levels[self.bucket_type]):
            self.log.info("Updating bucket_durability=%s" % d_level)
            self.bucket_util.update_bucket_property(
                self.bucket_util.buckets[0],
                bucket_durability=BucketDurability[d_level])
            self.bucket_util.print_bucket_stats()

            buckets = self.bucket_util.get_all_buckets()
            if buckets[0].durability_level != BucketDurability[d_level]:
                self.log_failure("New bucket_durability not taken")

            self.summary.add_step("Update bucket_durability=%s" % d_level)

            self.sleep(10, "MB-39678: Bucket_d_level change to take effect")

            if index == 0:
                op_type = DocLoading.Bucket.DocOps.CREATE
                self.verification_dict["ops_create"] += 1
            elif index == len_possible_d_levels:
                op_type = DocLoading.Bucket.DocOps.DELETE
                self.verification_dict["ops_delete"] += 1
            else:
                op_type = DocLoading.Bucket.DocOps.UPDATE
                if "ops_update" in self.verification_dict:
                    self.verification_dict["ops_update"] += 1

            result = client.crud(op_type, key, value,
                                 timeout=self.sdk_timeout)
            if result["status"] is False:
                self.log_failure("Doc_op %s failed on key %s: %s"
                                 % (op_type, key, result["error"]))
            self.summary.add_step("Doc_op %s" % op_type)
        client.close()

        # Cb_stats vb-details validation
        failed = self.durability_helper.verify_vbucket_details_stats(
            self.bucket_util.buckets[0],
            self.cluster_util.get_kv_nodes(),
            vbuckets=self.cluster_util.vbuckets,
            expected_val=self.verification_dict)
        if failed:
            self.log_failure("Cbstat vbucket-details validation failed")
        self.summary.add_step("Cbstats vb-details verification")
        self.validate_test_failure()

    def test_transaction_doc_isolation(self):
        def run_transaction_updates():
            self.log.info("Starting transaction updates in parallel")
            while not stop_thread:
                commit_trans = choice([True, False])
                trans_update_task = self.task.async_load_gen_docs_atomicity(
                    self.cluster, self.bucket_util.buckets, self.gen_load,
                    DocLoading.Bucket.DocOps.UPDATE,
                    exp=self.maxttl,
                    batch_size=50,
                    process_concurrency=3,
                    timeout_secs=self.sdk_timeout,
                    update_count=self.update_count,
                    transaction_timeout=self.transaction_timeout,
                    commit=commit_trans,
                    durability=self.durability_level,
                    sync=self.sync, defer=self.defer,
                    retries=0)
                self.task_manager.get_task_result(trans_update_task)

        stop_thread = False
        update_task = None
        self.sdk_timeout = 60

        self.log.info("Upgrading cluster nodes to target version")
        node_to_upgrade = self.fetch_node_to_upgrade()
        while node_to_upgrade is not None:
            self.log.info("Selected node for upgrade: %s"
                          % node_to_upgrade.ip)
            if self.upgrade_with_data_load:
                update_task = Thread(target=run_transaction_updates)
                update_task.start()

            self.upgrade_function[self.upgrade_type](node_to_upgrade,
                                                     self.upgrade_version)
            try:
                self.cluster.update_master_using_diag_eval(
                    self.cluster.servers[0])
            except Exception:
                self.cluster.update_master_using_diag_eval(
                    self.cluster.servers[self.nodes_init-1])

            if self.upgrade_with_data_load:
                stop_thread = True
                update_task.join()

            self.cluster_util.print_cluster_stats()
            self.bucket_util.print_bucket_stats()

            self.summary.add_step("Upgrade %s" % node_to_upgrade.ip)

            # Halt further upgrade if test has failed during current upgrade
            if self.test_failure:
                break

            node_to_upgrade = self.fetch_node_to_upgrade()
            for bucket in self.bucket_util.get_all_buckets():
                tombstone_doc_supported = \
                    "tombstonedUserXAttrs" in bucket.bucketCapabilities
                if node_to_upgrade is None and not tombstone_doc_supported:
                    self.log_failure("Tombstone docs not added to %s "
                                     "capabilities" % bucket.name)
                elif node_to_upgrade is not None and tombstone_doc_supported:
                    self.log_failure("Tombstone docs supported for %s before "
                                     "cluster upgrade" % bucket.name)

        self.validate_test_failure()

        create_gen = doc_generator(self.key, self.num_items,
                                   self.num_items*2)
        # Start transaction load after node upgrade
        trans_task = self.task.async_load_gen_docs_atomicity(
            self.cluster, self.bucket_util.buckets,
            create_gen, DocLoading.Bucket.DocOps.CREATE, exp=self.maxttl,
            batch_size=50,
            process_concurrency=8,
            timeout_secs=self.sdk_timeout,
            update_count=self.update_count,
            transaction_timeout=self.transaction_timeout,
            commit=True,
            durability=self.durability_level,
            sync=self.sync, defer=self.defer,
            retries=0)
        self.task_manager.get_task_result(trans_task)

    def test_cbcollect_info(self):
        self.parse = self.input.param("parse", False)
        self.metric_name = self.input.param("metric_name", "kv_curr_items")
        log_path = self.input.param("logs_folder")
        self.log.info("Starting update tasks")
        update_tasks = list()
        update_tasks.append(self.task.async_continuous_doc_ops(
            self.cluster, self.bucket, self.gen_load,
            op_type=DocLoading.Bucket.DocOps.UPDATE,
            persist_to=1,
            replicate_to=1,
            process_concurrency=1,
            batch_size=10,
            timeout_secs=30))
        update_tasks.append(self.task.async_continuous_doc_ops(
            self.cluster, self.bucket, self.gen_load,
            op_type=DocLoading.Bucket.DocOps.UPDATE,
            replicate_to=1,
            process_concurrency=1,
            batch_size=10,
            timeout_secs=30))
        update_tasks.append(self.task.async_continuous_doc_ops(
            self.cluster, self.bucket, self.gen_load,
            op_type=DocLoading.Bucket.DocOps.UPDATE,
            persist_to=1,
            process_concurrency=1,
            batch_size=10,
            timeout_secs=30))

        node_to_upgrade = self.fetch_node_to_upgrade()
        while node_to_upgrade is not None:
            # Cbcollect with mixed mode cluster
            status = self.__trigger_cbcollect(log_path)
            if status is False:
                break

            self.log.info("Selected node for upgrade: %s"
                          % node_to_upgrade.ip)
            self.upgrade_function[self.upgrade_type](node_to_upgrade,
                                                     self.upgrade_version)
            self.cluster_util.print_cluster_stats()

            try:
                self.cluster.update_master_using_diag_eval(
                    self.cluster.servers[0])
            except Exception:
                self.cluster.update_master_using_diag_eval(
                    self.cluster.servers[self.nodes_init-1])

            # TODO: Do some validations here
            try:
                self.get_all_metrics(self.parse, self.metric_name)
            except Exception:
                pass

            node_to_upgrade = self.fetch_node_to_upgrade()

            # Halt further upgrade if test has failed during current upgrade
            if self.test_failure is True:
                break

        # Metrics should work in fully upgraded cluster
        self.get_all_metrics(self.parse, self.metric_name)
        # Cbcollect with fully upgraded cluster
        self.__trigger_cbcollect(log_path)

        for update_task in update_tasks:
            # Wait for update_task to complete
            update_task.end_task()
            self.task_manager.get_task_result(update_task)

        self.validate_test_failure()

    def get_low_cardinality_metrics(self, parse):
        content = None
        for server in self.cluster_util.get_kv_nodes():
            content = StatsHelper(server).get_prometheus_metrics(parse=parse)
            if not parse:
                StatsHelper(server)._validate_metrics(content)
        for line in content:
            self.log.info(line.strip("\n"))

    def get_high_cardinality_metrics(self,parse):
        content = None
        try:
            for server in self.cluster_util.get_kv_nodes():
                content = StatsHelper(server).get_prometheus_metrics_high(
                    parse=parse)
                if not parse:
                    StatsHelper(server)._validate_metrics(content)
            for line in content:
                self.log.info(line.strip("\n"))
        except:
            pass

    def get_range_api_metrics(self, metric_name):
        label_values = {"bucket": self.bucket_util.buckets[0].name,
                        "nodes": self.cluster.master.ip}
        content = StatsHelper(self.cluster.master).get_range_api_metrics(
            metric_name, label_values=label_values)
        self.log.info(content)

    def get_instant_api(self, metric_name):
        pass

    def get_all_metrics(self, parse, metrics):
        self.get_low_cardinality_metrics(parse)
        self.get_high_cardinality_metrics(parse)
        self.get_range_api_metrics(metrics)
        self.get_instant_api(metrics)
    def test_with_process_crash(self):
        """
        Test to make sure durability will succeed even if a node goes down
        due to crash and has enough nodes to satisfy the durability

        1. Select a node from the cluster to simulate the specified error
        2. Perform CRUD on the target bucket with given timeout
        3. Using cbstats to verify the operation succeeds
        4. Validate all mutations are succeeded

        Note: self.sdk_timeout values is considered as 'seconds'
        """
        if self.num_replicas < 2:
            self.assertTrue(False, msg="Required: num_replicas > 1")

        # Override num_of_nodes affected to 1 (Positive case)
        self.num_nodes_affected = 1

        error_sim = dict()
        shell_conn = dict()
        cbstat_obj = dict()
        failover_info = dict()
        vb_info_info = dict()
        active_vbs_in_target_nodes = list()
        failover_info["init"] = dict()
        failover_info["afterCrud"] = dict()
        vb_info_info["init"] = dict()
        vb_info_info["afterCrud"] = dict()

        self.log.info("Selecting nodes to simulate error condition")
        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)

        self.log.info("Will simulate error condition on %s" % target_nodes)
        for node in target_nodes:
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(shell_conn[node.ip])
            active_vbs_in_target_nodes += cbstat_obj[node.ip].vbucket_list(
                self.bucket.name, "active")
            vb_info_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                self.bucket.name)
            failover_info["init"][node.ip] = \
                cbstat_obj[node.ip].failover_stats(self.bucket.name)

        # Remove active vbuckets from doc_loading to avoid errors
        load_spec = dict()
        load_spec["doc_crud"] = dict()
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 100
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.UPDATE_PERCENTAGE_PER_COLLECTION] = 25
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.DELETE_PERCENTAGE_PER_COLLECTION] = 25
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.COMMON_DOC_KEY] = "test_collections"
        load_spec["target_vbuckets"] = list(
            set(range(0, 1024)) ^ set(active_vbs_in_target_nodes))

        self.log.info("Perform 'create', 'update', 'delete' mutations")
        doc_loading_task = \
            self.bucket_util.run_scenario_from_spec(
                self.task,
                self.cluster,
                self.bucket_util.buckets,
                load_spec,
                mutation_num=1,
                async_load=True)

        self.sleep(5, "Wait for doc loaders to start loading data")

        for node in target_nodes:
            # Create shell_connections
            shell_conn[node.ip] = RemoteMachineShellConnection(node)

            # Perform specified action
            error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip])
            error_sim[node.ip].create(self.simulate_error,
                                      bucket_name=self.bucket.name)

        # Perform new scope/collection creation during doc ops in parallel
        self.__perform_collection_crud()

        # Wait for document_loader tasks to complete
        self.task_manager.get_task_result(doc_loading_task)
        self.bucket_util.validate_doc_loading_results(doc_loading_task)
        if doc_loading_task.result is False:
            self.log_failure("Doc CRUDs failed with process crash")

        if self.simulate_error \
                not in [DiskError.DISK_FULL, DiskError.DISK_FAILURE]:
            # Revert the induced error condition
            for node in target_nodes:
                error_sim[node.ip].revert(self.simulate_error,
                                          bucket_name=self.bucket.name)

                # Disconnect the shell connection
                shell_conn[node.ip].disconnect()
            self.sleep(10, "Wait for node recovery to complete")

            # In case of error with Ephemeral bucket, need to rebalance
            # to make sure data is redistributed properly
            if self.bucket_type == Bucket.Type.EPHEMERAL:
                retry_num = 0
                result = None
                while retry_num != 2:
                    result = self.task.rebalance(
                        self.servers[0:self.nodes_init], [], [])
                    if result:
                        break
                    retry_num += 1
                    self.sleep(10, "Wait before retrying rebalance")

                self.assertTrue(result, "Rebalance failed")

        # Fetch latest failover stats and validate the values are updated
        self.log.info("Validating failover and seqno cbstats")
        for node in target_nodes:
            vb_info_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].vbucket_seqno(self.bucket.name)
            failover_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].failover_stats(self.bucket.name)

            # Failover stat validation
            if self.simulate_error == CouchbaseError.KILL_MEMCACHED:
                val = failover_info["init"][node.ip] \
                      != failover_info["afterCrud"][node.ip]
            else:
                if self.simulate_error != CouchbaseError.STOP_MEMCACHED \
                        and self.bucket_type == Bucket.Type.EPHEMERAL:
                    val = failover_info["init"][node.ip] \
                          != failover_info["afterCrud"][node.ip]
                else:
                    val = failover_info["init"][node.ip] \
                          == failover_info["afterCrud"][node.ip]
            error_msg = "Failover stats mismatch after error condition:" \
                        " %s != %s" \
                        % (failover_info["init"][node.ip],
                           failover_info["afterCrud"][node.ip])
            self.assertTrue(val, msg=error_msg)

            # Seq_no validation (High level)
            val = \
                vb_info_info["init"][node.ip] \
                != vb_info_info["afterCrud"][node.ip]
            self.assertTrue(val, msg="vbucket seq_no not updated after CRUDs")

        # Doc count validation
        self.validate_test_failure()
        self.bucket_util.validate_docs_per_collections_all_buckets()
示例#30
0
 def setUp(self):
     super(DurabilitySuccessTests, self).setUp()
     self.durability_helper = DurabilityHelper(
         self.log, len(self.cluster.nodes_in_cluster),
         self.durability_level)
     self.log.info("=== DurabilitySuccessTests setup complete ===")