def wait_for_ns_servers_or_assert(servers, testcase, wait_time=360, wait_if_warmup=False):
        for server in servers:
            rest = RestConnection(server)
            log = logger.Logger.get_logger()
            log.info("waiting for ns_server @ {0}:{1}".format(server.ip, server.port))
            if RestHelper(rest).is_ns_server_running(wait_time):
                log.info("ns_server @ {0}:{1} is running".format(server.ip, server.port))

            elif wait_if_warmup:
                # wait when warmup completed
                buckets = rest.get_buckets()
                for bucket in buckets:
                    testcase.assertTrue(ClusterOperationHelper._wait_warmup_completed(testcase, \
                                [server], bucket.name, wait_time), "warmup was not completed!")

            else:
                testcase.fail("ns_server {0} is not running in {1} sec".format(server.ip, wait_time))
示例#2
0
 def wait_for_vbuckets_ready_state(node,
                                   bucket,
                                   timeout_in_seconds=300,
                                   log_msg=''):
     log = logger.Logger.get_logger()
     start_time = time.time()
     end_time = start_time + timeout_in_seconds
     ready_vbuckets = {}
     rest = RestConnection(node)
     servers = rest.get_nodes()
     RestHelper(rest).vbucket_map_ready(bucket, 60)
     vbucket_count = len(rest.get_vbuckets(bucket))
     vbuckets = rest.get_vbuckets(bucket)
     obj = VBucketAwareMemcached(rest, bucket)
     memcacheds, vbucket_map, vbucket_map_replica = obj.request_map(
         rest, bucket)
     #Create dictionary with key:"ip:port" and value: a list of vbuckets
     server_dict = defaultdict(list)
     for everyID in range(0, vbucket_count):
         memcached_ip_port = str(vbucket_map[everyID])
         server_dict[memcached_ip_port].append(everyID)
     while time.time() < end_time and len(ready_vbuckets) < vbucket_count:
         for every_ip_port in server_dict:
             #Retrieve memcached ip and port
             ip, port = every_ip_port.split(":")
             client = MemcachedClient(ip, int(port), timeout=30)
             client.vbucket_count = len(vbuckets)
             bucket_info = rest.get_bucket(bucket)
             client.sasl_auth_plain(
                 bucket_info.name.encode('ascii'),
                 bucket_info.saslPassword.encode('ascii'))
             for i in server_dict[every_ip_port]:
                 try:
                     (a, b, c) = client.get_vbucket_state(i)
                 except mc_bin_client.MemcachedError as e:
                     log.error("%s: %s" % (log_msg, e))
                     continue
                 if c.find("\x01") > 0 or c.find("\x02") > 0:
                     ready_vbuckets[i] = True
                 elif i in ready_vbuckets:
                     log.warning(
                         "vbucket state changed from active to {0}".format(
                             c))
                     del ready_vbuckets[i]
             client.close()
     return len(ready_vbuckets) == vbucket_count
 def _create_default_bucket(self):
     name = "default"
     master = self.servers[0]
     rest = RestConnection(master)
     helper = RestHelper(RestConnection(master))
     if not helper.bucket_exists(name):
         node_ram_ratio = BucketOperationHelper.base_bucket_ratio(
             self.servers)
         info = rest.get_nodes_self()
         available_ram = info.mcdMemoryReserved * node_ram_ratio
         rest.create_bucket(bucket=name, ramQuotaMB=int(available_ram))
         ready = BucketOperationHelper.wait_for_memcached(master, name)
         self.assertTrue(ready, msg="wait_for_memcached failed")
     self.assertTrue(helper.bucket_exists(name),
                     msg="unable to create {0} bucket".format(name))
     self.load_thread = None
     self.shutdown_load_data = False
示例#4
0
 def _create_default_bucket(self):
     helper = RestHelper(self.rest)
     if not helper.bucket_exists(self.bucket):
         node_ram_ratio = BucketOperationHelper.base_bucket_ratio(
             self.servers)
         info = self.rest.get_nodes_self()
         available_ram = int(info.memoryQuota * node_ram_ratio)
         if available_ram < 256:
             available_ram = 256
         self.rest.create_bucket(bucket=self.bucket,
                                 ramQuotaMB=available_ram)
         ready = BucketOperationHelper.wait_for_memcached(
             self.master, self.bucket)
         self.testcase.assertTrue(ready, "wait_for_memcached failed")
     self.testcase.assertTrue(
         helper.bucket_exists(self.bucket),
         "unable to create {0} bucket".format(self.bucket))
示例#5
0
 def rebalance_out(self, how_many):
     msg = "choosing three nodes and rebalance them out from the cluster"
     self.log.info(msg)
     rest = RestConnection(self._servers[0])
     nodes = rest.node_statuses()
     nodeIps = [node.ip for node in nodes]
     self.log.info("current nodes : {0}".format(nodeIps))
     toBeEjected = []
     toBeEjectedServers = []
     selection = self._servers[1:]
     shuffle(selection)
     for server in selection:
         for node in nodes:
             if server.ip == node.ip:
                 toBeEjected.append(node.id)
                 toBeEjectedServers.append(server)
                 break
         if len(toBeEjected) == how_many:
             break
     if len(toBeEjected) > 0:
         self.log.info(
             "selected {0} for rebalance out from the cluster".format(
                 toBeEjected))
         otpNodes = [node.id for node in nodes]
         started = rest.rebalance(otpNodes, toBeEjected)
         msg = "rebalance operation started ? {0}"
         self.log.info(msg.format(started))
         if started:
             result = rest.monitorRebalance()
             msg = "successfully rebalanced out selected nodes from the cluster ? {0}"
             self.log.info(msg.format(result))
             for server in toBeEjectedServers:
                 shell = RemoteMachineShellConnection(server)
                 try:
                     shell.stop_membase()
                 except:
                     pass
                 try:
                     shell.start_membase()
                 except:
                     pass
                 shell.disconnect()
                 RestHelper(RestConnection(server)).is_ns_server_running()
                 #let's restart membase on those nodes
             return result
     return True
 def wait_for_ns_servers_or_assert(self,
                                   servers,
                                   wait_time=360,
                                   wait_if_warmup=False):
     for server in servers:
         rest = RestConnection(server)
         self.log.debug("Waiting for ns_server @ {0}:{1}".format(
             server.ip, server.port))
         if RestHelper(rest).is_ns_server_running(wait_time):
             self.log.debug("ns_server @ {0}:{1} is running".format(
                 server.ip, server.port))
         else:
             self.log.error(
                 "ns_server {0} is not running in {1} sec".format(
                     server.ip, wait_time))
             return False
         return True
示例#7
0
 def test_create_bucket_used_port(self):
     ports = [25, 68, 80, 135, 139, 143, 500]
     for port in ports:
         try:
             self.cluster.create_standard_bucket(
                 self.server, self.bucket_name + str(port), port,
                 self.bucket_size, self.num_replicas)
         except:
             self.log.info('Error appears as expected')
             rest = RestConnection(self.master)
             self.assertTrue(
                 RestHelper(rest).is_ns_server_running(
                     timeout_in_seconds=60))
         else:
             raise Exception(
                 'User has to be unable to create a bucket using port %s' %
                 port)
示例#8
0
 def wipe_config_on_removed_nodes(self, remove_nodes):
     """
     Stop servers on nodes that were failed over and removed, and wipe config dir
     """
     for node in remove_nodes:
         self.log.info("Wiping node config and restarting server on {0}".format(node))
         rest = RestConnection(node)
         data_path = rest.get_data_path()
         shell = RemoteMachineShellConnection(node)
         shell.stop_couchbase()
         self.sleep(10)
         shell.cleanup_data_config(data_path)
         shell.start_server()
         self.sleep(10)
         if not RestHelper(rest).is_ns_server_running():
             self.log.error("ns_server {0} is not running.".format(node.ip))
         shell.disconnect()
示例#9
0
 def _create_default_bucket(self, replica=1):
     name = "default"
     master = self.servers[0]
     rest = RestConnection(master)
     helper = RestHelper(RestConnection(master))
     if not helper.bucket_exists(name):
         node_ram_ratio = BucketOperationHelper.base_bucket_ratio(
             self.servers)
         info = rest.get_nodes_self()
         available_ram = info.memoryQuota * node_ram_ratio
         rest.create_bucket(bucket=name,
                            ramQuotaMB=int(available_ram),
                            replicaNumber=replica)
         ready = BucketOperationHelper.wait_for_memcached(master, name)
         self.assertTrue(ready, msg="wait_for_memcached failed")
     self.assertTrue(helper.bucket_exists(name),
                     msg="unable to create {0} bucket".format(name))
    def test_online_swap_rebalance_upgrade(self):
        """ Online swap rebalance upgrade test

        The old nodes are removed and the new nodes are added followed by a rebalance.
        """
        # Installs the `self.initial_version` of Couchbase on the first two servers
        self.product = 'couchbase-server'
        self._install(self.input.servers[:2])

        # Check Couchbase is running post installation
        for server in self.input.servers:
            self.assertTrue(RestHelper(RestConnection(server)).is_ns_server_running(60), f"ns_server is not running on {server}")

        # Install the `self.upgrade_versions` on the last 2 nodes
        self.initial_version = self.upgrade_versions[0]
        self._install(self.input.servers[2:])

        # Remove the first two nodes and perform a rebalance
        self.cluster.rebalance(self.servers, self.servers[2:], self.servers[:2], services=["kv", "kv"])

        # Replace the services of the last node with kv and backup
        self.replace_services(self.servers[2:], self.servers[-1], ["kv,backup"])

        # Add the built in user for memcached authentication
        self.add_built_in_server_user(node=self.servers[2])

        # Create the default bucket and update the list of buckets
        rest_conn = RestConnection(self.servers[2])
        rest_conn.create_bucket(bucket='default', ramQuotaMB=512, compressionMode=self.compression_mode)
        self.buckets = rest_conn.get_buckets()

        # Populate the buckets with data
        self._load_all_buckets(self.servers[2], BlobGenerator("ent-backup", "ent-backup-", self.value_size, end=self.num_items), "create", 0)

        try:
            backup_service_hook = BackupServiceHook(self.servers[-1], self.servers, self.backupset, None)

            # Wait for the data to be persisted to disk
            for bucket in self.buckets:
                if not RebalanceHelper.wait_for_stats_on_all(backup_service_hook.backup_service.master, bucket.name, 'ep_queue_size', 0, timeout_in_seconds=200):
                    self.fail("Timeout reached while waiting for 'eq_queue_size' to reach 0")

            backup_service_hook.run_test()
        finally:
            backup_service_hook.cleanup()
示例#11
0
    def replication_verification(master, bucket_data, replica, test, failed_over=False):
        asserts = []
        rest = RestConnection(master)
        buckets = rest.get_buckets()
        nodes = rest.node_statuses()
        test.log.info("expect {0} / {1} replication ? {2}".format(len(nodes),
            (1.0 + replica), len(nodes) / (1.0 + replica)))
        for bucket in buckets:
            ClusterOperationHelper.flushctl_set(master, "exp_pager_stime", 30, bucket.name)
        if len(nodes) / (1.0 + replica) >= 1:
            final_replication_state = RestHelper(rest).wait_for_replication(300)
            msg = "replication state after waiting for up to 5 minutes : {0}"
            test.log.info(msg.format(final_replication_state))
            #run expiry_pager on all nodes before doing the replication verification
            for bucket in buckets:
                ClusterOperationHelper.flushctl_set(master, "exp_pager_stime", 30, bucket.name)
                test.log.info("wait for expiry pager to run on all these nodes")
                time.sleep(30)
                ClusterOperationHelper.flushctl_set(master, "exp_pager_stime", 3600, bucket.name)
                ClusterOperationHelper.flushctl_set(master, "exp_pager_stime", 30, bucket.name)
                # windows need more than 15 minutes to get number matched
                replica_match = RebalanceHelper.wait_till_total_numbers_match(bucket=bucket.name,
                    master=master,
                    timeout_in_seconds=600)
                if not replica_match:
                    asserts.append("replication was completed but sum(curr_items) don't match the curr_items_total %s" %
                                   bucket.name)
                if not failed_over:
                    stats = rest.get_bucket_stats(bucket=bucket.name)
                    RebalanceHelper.print_taps_from_all_nodes(rest, bucket.name)
                    msg = "curr_items : {0} is not equal to actual # of keys inserted : {1} : bucket: {2}"

                    if bucket_data[bucket.name]['kv_store'] is None:
                        items_inserted = bucket_data[bucket.name]["items_inserted_count"]
                    else:
                        items_inserted = len(bucket_data[bucket.name]['kv_store'].valid_items())

                    active_items_match = stats["curr_items"] == items_inserted
                    if not active_items_match:
                        asserts.append(msg.format(stats["curr_items"], items_inserted, bucket.name))

        if len(asserts) > 0:
            for msg in asserts:
                test.log.error(msg)
            test.assertTrue(len(asserts) == 0, msg=asserts)
示例#12
0
    def cleanup_cluster(servers, wait_for_rebalance=True):
        log = logger.Logger.get_logger()
        rest = RestConnection(servers[0])
        helper = RestHelper(rest)
        helper.is_ns_server_running(timeout_in_seconds=testconstants.NS_SERVER_TIMEOUT)
        nodes = rest.node_statuses()
        master_id = rest.get_nodes_self().id
        if len(nodes) > 1:
            log.info("rebalancing all nodes in order to remove nodes")
            rest.log_client_error("Starting rebalance from test, ejected nodes %s" % \
                                                             [node.id for node in nodes if node.id != master_id])
            removed = helper.remove_nodes(knownNodes=[node.id for node in nodes],
                                          ejectedNodes=[node.id for node in nodes if node.id != master_id],
                                          wait_for_rebalance=wait_for_rebalance)
            success_cleaned = []
            for removed in [node for node in nodes if (node.id != master_id)]:
                removed.rest_password = servers[0].rest_password
                removed.rest_username = servers[0].rest_username
                try:
                    rest = RestConnection(removed)
                except Exception as ex:
                    log.error("can't create rest connection after rebalance out for ejected nodes,\
                        will retry after 10 seconds according to MB-8430: {0} ".format(ex))
                    time.sleep(10)
                    rest = RestConnection(removed)
                start = time.time()
                while time.time() - start < 30:
                    if len(rest.get_pools_info()["pools"]) == 0:
                        success_cleaned.append(removed)
                        break
                    else:
                        time.sleep(0.1)
                if time.time() - start > 10:
                    log.error("'pools' on node {0}:{1} - {2}".format(
                           removed.ip, removed.port, rest.get_pools_info()["pools"]))
            for node in set([node for node in nodes if (node.id != master_id)]) - set(success_cleaned):
                log.error("node {0}:{1} was not cleaned after removing from cluster".format(
                           removed.ip, removed.port))
            if len(set([node for node in nodes if (node.id != master_id)])\
                    - set(success_cleaned)) != 0:
                raise Exception("not all ejected nodes were cleaned successfully")

            log.info("removed all the nodes from cluster associated with {0} ? {1}".format(servers[0], \
                    [(node.id, node.port) for node in nodes if (node.id != master_id)]))
示例#13
0
    def test_stream_after_n_crashes(self):
        crashes = self.input.param("crash_num", 5)
        vbucket = randint(0, self.vbuckets)
        bucket = self.bucket_util.buckets[0]

        self.log.info("Chosen vbucket {0} for {1} crashes".format(
            vbucket, crashes))
        start = 0
        end = self.num_items

        nodeA = self.cluster.servers[0]
        shell_conn = RemoteMachineShellConnection(nodeA)
        cb_stat_obj = Cbstats(shell_conn)
        rest = RestHelper(RestConnection(nodeA))

        for _ in xrange(crashes):
            # Load data into the selected vbucket
            self.load_docs(bucket, vbucket, start, end, "create")
            self.assertTrue(self.stop_node(0), msg="Failed during stop_node")
            self.sleep(5, "Sleep after stop_node")
            self.assertTrue(self.start_node(0), msg="Failed during start_node")
            self.assertTrue(rest.is_ns_server_running(),
                            msg="Failed while is_ns_server_running check")
            self.sleep(5, "Waiting after ns_server started")

            # Fetch vbucket seqno stats
            vb_stat = cb_stat_obj.vbucket_seqno(bucket.name)
            dcp_client = self.dcp_client(nodeA, dcp.constants.PRODUCER)
            stream = dcp_client.stream_req(vbucket, 0, 0,
                                           vb_stat[vbucket]["high_seqno"],
                                           vb_stat[vbucket]["uuid"])
            stream.run()

            self.assertTrue(
                stream.last_by_seqno == vb_stat[vbucket]["high_seqno"],
                msg="Mismatch in high_seqno. {0} == {1}".format(
                    vb_stat[vbucket]["high_seqno"], stream.last_by_seqno))

            # Update start/end values for next loop
            start = end
            end += self.num_items

        # Disconnect shell Connection for the node
        shell_conn.disconnect()
示例#14
0
 def verify_upgrade_rebalance_in_out(self):
     self.master = self.servers[self.initial_num_servers]
     self.rest = RestConnection(self.master)
     self.rest_helper = RestHelper(self.rest)
     for bucket in self.buckets:
         if self.rest_helper.bucket_exists(bucket.name):
             continue
         else:
             raise Exception("bucket:- %s not found" % bucket.name)
     if self.op_types == "bucket":
         bucketinfo = self.rest.get_bucket(bucket.name)
         self.log.info("bucket info :- %s" % bucketinfo)
     if self.op_types == "data":
         self._wait_for_stats_all_buckets(
             self.servers[self.initial_num_servers:self.num_servers])
         self._verify_all_buckets(self.master, 1, self.wait_timeout * 50,
                                  self.max_verify, True, 1)
         self._verify_stats_all_buckets(
             self.servers[self.initial_num_servers:self.num_servers])
示例#15
0
    def _modify_bucket(self):
        helper = RestHelper(self.rest)
        node_ram_ratio = BucketOperationHelper.base_bucket_ratio(self.servers)
        info = self.rest.get_nodes_self()

        status, content = self.rest.change_bucket_props(
            bucket=self.bucket,
            ramQuotaMB=512,
            authType='sasl',
            timeSynchronization='enabledWithOutDrift')
        if re.search('TimeSyncronization not allowed in update bucket',
                     content):
            self.log.info(
                '[PASS]Expected modify bucket to disallow Time Synchronization.'
            )
        else:
            self.fail(
                '[ERROR] Not expected to allow modify bucket for Time Synchronization'
            )
示例#16
0
 def cleanup_cluster(servers, wait_for_rebalance=True):
     log = logger.Logger.get_logger()
     rest = RestConnection(servers[0])
     helper = RestHelper(rest)
     helper.is_ns_server_running(
         timeout_in_seconds=testconstants.NS_SERVER_TIMEOUT)
     nodes = rest.node_statuses()
     master_id = rest.get_nodes_self().id
     if len(nodes) > 1:
         log.info("rebalancing all nodes in order to remove nodes")
         removed = helper.remove_nodes(
             knownNodes=[node.id for node in nodes],
             ejectedNodes=[
                 node.id for node in nodes if node.id != master_id
             ],
             wait_for_rebalance=wait_for_rebalance)
         log.info(
             "removed all the nodes from cluster associated with {0} ? {1}".
             format(servers[0], removed))
示例#17
0
 def test_win_specific_names(self):
     version = self._get_cb_version()
     if self._get_cb_os() != 'windows':
         self.log.warn('This test is windows specific')
         return
     try:
         self.test_banned_bucket_name()
     finally:
         try:
             self.log.info('Will check if ns_server is running')
             rest = RestConnection(self.master)
             self.assertTrue(
                 RestHelper(rest).is_ns_server_running(
                     timeout_in_seconds=60))
         except:
             self._reinstall(version)
             self.fail(
                 "ns_server is not running after bucket '%s' creation" %
                 (self.bucket_name))
示例#18
0
 def remove_node(self,otpnode=None, wait_for_rebalance=True):
     nodes = self.rest.node_statuses()
     '''This is the case when master node is running cbas service as well'''
     if len(nodes) <= len(otpnode):
         return
     
     helper = RestHelper(self.rest)
     try:
         removed = helper.remove_nodes(knownNodes=[node.id for node in nodes],
                                           ejectedNodes=[node.id for node in otpnode],
                                           wait_for_rebalance=wait_for_rebalance)
     except Exception as e:
         self.log.info("First time rebalance failed on Removal. Wait and try again. THIS IS A BUG.")
         time.sleep(5)
         removed = helper.remove_nodes(knownNodes=[node.id for node in nodes],
                                           ejectedNodes=[node.id for node in otpnode],
                                           wait_for_rebalance=wait_for_rebalance)
     if wait_for_rebalance:
         removed
示例#19
0
    def test_crash_entire_cluster(self):
        self.cluster.rebalance([self.master], self.servers[1:], [])

        bucket = self.bucket_util.buckets[0]
        vbucket = randint(0, self.vbuckets)
        nodeA = self.servers[0]
        self.load_docs(bucket, vbucket, 0, self.num_items, "create")

        shell_conn = RemoteMachineShellConnection(nodeA)
        cb_stat_obj = Cbstats(shell_conn)

        dcp_client = self.dcp_client(nodeA, dcp.constants.PRODUCER)
        _ = dcp_client.stream_req(vbucket, 0, 0, 2 * self.num_items, 0)
        self.load_docs(nodeA, vbucket, self.num_items)

        # stop all nodes
        node_range = range(len(self.servers))
        for i in node_range:
            self.assertTrue(self.stop_node(i), msg="Failed during stoip_node")
        self.sleep(2, "Wait after stop_node")

        # start all nodes in reverse order
        node_range.reverse()
        for i in node_range:
            self.assertTrue(self.start_node(i), msg="Failed during start_node")

        rest = RestHelper(RestConnection(nodeA))
        self.assertTrue(rest.is_ns_server_running(),
                        msg="Failed while is_ns_server_running check")

        vb_info = cb_stat_obj.vbucket_seqno(bucket.name)
        dcp_client = self.dcp_client(nodeA, dcp.constants.PRODUCER)
        stream = dcp_client.stream_req(vbucket, 0, 0,
                                       vb_info[vbucket]["high_seqno"], 0)
        stream.run()
        self.assertTrue(stream.last_by_seqno == vb_info[vbucket]["high_seqno"],
                        msg="Seq-no mismatch. {0} != {1}".format(
                            stream.last_by_seqno,
                            vb_info[vbucket]["high_seqno"]))

        # Disconnect shell Connection for the node
        shell_conn.disconnect()
示例#20
0
    def test_failover_transaction(self):
        query_node = self.servers[1]
        sleep_time_ms = 10000
        threads = [None] * self.thread_count
        results = [None] * self.thread_count

        # Start a transaction
        begin_work = self.run_cbq_query(query="BEGIN WORK",
                                        server=query_node,
                                        txtimeout="2m")
        txid = begin_work['results'][0]['txid']

        # Launch query thread/s (should be single)
        select_statement = f"select {sleep_time_ms}"
        for i in range(len(threads)):
            self.log.info(f"Lauching query thread {i}")
            threads[i] = threading.Thread(target=self.run_query,
                                          args=(select_statement, query_node,
                                                results, i, 60, txid))
            threads[i].start()

        # Perform failover or removal of query node
        self.sleep(2)
        if self.action == 'failover':
            failover = self.cluster.failover(servers=self.servers,
                                             failover_nodes=[query_node],
                                             graceful=self.graceful)
        elif self.action == 'remove':
            rebalance = self.cluster.async_rebalance(servers=self.servers,
                                                     to_add=[],
                                                     to_remove=[query_node])
            reached = RestHelper(self.rest).rebalance_reached()
            self.assertTrue(reached,
                            "rebalance failed, stuck or did not complete")
            rebalance.result()

        # Check query thread/s completed successfuly
        for i in range(len(threads)):
            threads[i].join()
        self.log.info(results)
        for i in range(len(threads)):
            self.assertEqual(results[i], [{'$1': sleep_time_ms}])
    def _create_bucket(self, lww=True, drift=False, name=None):

        if lww:
            self.lww=lww

        if  name:
            self.bucket=name

        helper = RestHelper(self.rest)
        if not helper.bucket_exists(self.bucket):
            node_ram_ratio = BucketOperationHelper.base_bucket_ratio(
                self.servers)
            info = self.rest.get_nodes_self()
            self.rest.create_bucket(bucket=self.bucket,
                ramQuotaMB=512, authType='sasl', lww=self.lww)
            try:
                ready = BucketOperationHelper.wait_for_memcached(self.master,
                    self.bucket)
            except Exception as e:
                self.fail('unable to create bucket')
示例#22
0
 def test_rebalance_in_query_node(self):
     self.run_cbq_query(
         query="PREPARE p1 from select * from default limit 5",
         server=self.servers[0])
     self.sleep(5)
     for i in range(self.nodes_init):
         self.run_cbq_query(query="execute p1", server=self.servers[i])
     services_in = ["n1ql", "index", "data"]
     rebalance = self.cluster.async_rebalance(
         self.servers[:self.nodes_init],
         [self.servers[self.nodes_init + 1]], [],
         services=services_in)
     reached = RestHelper(self.rest).rebalance_reached()
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     rebalance.result()
     self.sleep(30)
     for i in range(self.nodes_init + 2):
         self.run_cbq_query(query="execute '[%s:%s]p1'" %
                            (self.servers[0].ip, self.servers[0].port),
                            server=self.servers[i])
示例#23
0
 def add_node_and_rebalance(self, master, servers):
     ClusterOperationHelper.add_all_nodes_or_assert(master, servers, self.input.membase_settings, self)
     rest = RestConnection(master)
     nodes = rest.node_statuses()
     otpNodeIds = []
     for node in nodes:
         otpNodeIds.append(node.id)
     rebalanceStarted = rest.rebalance(otpNodeIds, [])
     self.assertTrue(rebalanceStarted,
                     "unable to start rebalance on master node {0}".format(master.ip))
     self.log.info('started rebalance operation on master node {0}'.format(master.ip))
     rebalanceSucceeded = rest.monitorRebalance()
     self.assertTrue(rebalanceSucceeded,
                     "rebalance operation for nodes: {0} was not successful".format(otpNodeIds))
     self.log.info('rebalance operaton succeeded for nodes: {0}'.format(otpNodeIds))
     #now remove the nodes
     #make sure its rebalanced and node statuses are healthy
     helper = RestHelper(rest)
     self.assertTrue(helper.is_cluster_healthy, "cluster status is not healthy")
     self.assertTrue(helper.is_cluster_rebalanced, "cluster is not balanced")
示例#24
0
    def failover(self, howmany):
        #chekck if all nodes are part of the cluster
        rest = RestConnection(self.servers[0])
        nodes = rest.node_statuses()
        if len(nodes) != len(self.servers):
            self.test.fail(
                num_nodes_mismatch.format(len(self.servers), len(nodes)))
        if len(nodes) - howmany < 2:
            self.test.fail(num_nodes_mismatch.format(len(nodes), howmany))
        master_node = rest.get_nodes_self()
        #when selecting make sure we dont pick the master node
        selection = [n for n in nodes if n.id != master_node.id]

        shuffle(selection)
        failed = selection[0:howmany]
        for f in failed:
            self.log.info("will fail over node : {0}".format(f.id))

        if len(nodes) // (1 + howmany) >= 1:
            self.test.assertTrue(
                RebalanceHelper.wait_for_replication(rest.get_nodes(),
                                                     timeout=900),
                msg="replication did not finish after 15 minutes")
            for f in failed:
                self._stop_server(f)
                self.log.info(
                    "10 seconds delay to wait for membase-server to shutdown")
            #wait for 5 minutes until node is down

            for f in failed:
                if f.port == 8091:
                    self.test.assertTrue(
                        RestHelper(rest).wait_for_node_status(
                            f, "unhealthy", 300),
                        msg=
                        "node status is not unhealthy even after waiting for 5 minutes"
                    )
                self.test.assertTrue(rest.fail_over(f.id),
                                     msg="failover did not complete")
                self.log.info("failed over node : {0}".format(f.id))
        return failed
示例#25
0
    def _common_test_body(self):
        master = self.servers[0]
        rest = RestConnection(master)
        creds = self.input.membase_settings
        bucket_data = RebalanceBaseTest.bucket_data_init(rest)

        ClusterHelper.add_all_nodes_or_assert(master, self.servers, creds, self)
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[])
        self.assertTrue(rest.monitorRebalance(),
            msg="rebalance operation failed after adding nodes")

        nodes = rest.node_statuses()

        #dont rebalance out the current node
        while len(nodes) > 1:
            #pick a node that is not the master node
            toBeEjectedNode = RebalanceHelper.pick_node(master)
            distribution = RebalanceBaseTest.get_distribution(self.load_ratio)
            RebalanceBaseTest.load_data_for_buckets(rest, self.load_ratio, distribution, [master], bucket_data, self)
            self.log.info("current nodes : {0}".format([node.id for node in rest.node_statuses()]))
            #let's start/step rebalance three times
            self.log.info("removing node {0} and rebalance afterwards".format(toBeEjectedNode.id))
            rest.fail_over(toBeEjectedNode.id)
            self.log.info("failed over {0}".format(toBeEjectedNode.id))
            time.sleep(10)
            rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
                ejectedNodes=[toBeEjectedNode.id])
            expected_progress = 30
            reached = RestHelper(rest).rebalance_reached(expected_progress)
            self.assertTrue(reached, "rebalance failed or did not reach {0}%".format(expected_progress))
            stopped = rest.stop_rebalance()
            self.assertTrue(stopped, msg="unable to stop rebalance")
            time.sleep(20)
            RebalanceBaseTest.replication_verification(master, bucket_data, self.replica, self)
            rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[toBeEjectedNode.id])
            self.assertTrue(rest.monitorRebalance(),
                msg="rebalance operation failed after adding node {0}".format(toBeEjectedNode.id))
            time.sleep(20)

            RebalanceBaseTest.replication_verification(master, bucket_data, self.replica, self)
            nodes = rest.node_statuses()
    def common_setup(self, replica):
        self._input = TestInputSingleton.input
        self._servers = self._input.servers
        first = self._servers[0]
        self.log = logger.Logger().get_logger()
        self.log.info(self._input)
        rest = RestConnection(first)
        for server in self._servers:
            RestHelper(RestConnection(server)).is_ns_server_running()

        ClusterOperationHelper.cleanup_cluster(self._servers)
        BucketOperationHelper.delete_all_buckets_or_assert(self._servers, self)
        ClusterOperationHelper.add_all_nodes_or_assert(
            self._servers[0], self._servers, self._input.membase_settings,
            self)
        nodes = rest.node_statuses()
        otpNodeIds = []
        for node in nodes:
            otpNodeIds.append(node.id)
        info = rest.get_nodes_self()
        bucket_ram = info.mcdMemoryReserved * 3 / 4
        rest.create_bucket(bucket="default",
                           ramQuotaMB=int(bucket_ram),
                           replicaNumber=replica,
                           proxyPort=rest.get_nodes_self().moxi)
        msg = "wait_for_memcached fails"
        ready = BucketOperationHelper.wait_for_memcached(first, "default"),
        self.assertTrue(ready, msg)
        rebalanceStarted = rest.rebalance(otpNodeIds, [])
        self.assertTrue(
            rebalanceStarted,
            "unable to start rebalance on master node {0}".format(first.ip))
        self.log.info('started rebalance operation on master node {0}'.format(
            first.ip))
        rebalanceSucceeded = rest.monitorRebalance()
        # without a bucket this seems to fail
        self.assertTrue(
            rebalanceSucceeded,
            "rebalance operation for nodes: {0} was not successful".format(
                otpNodeIds))
        self.awareness = VBucketAwareMemcached(rest, "default")
示例#27
0
 def load_some_data(serverInfo,
                fill_ram_percentage=10.0,
                bucket_name='default'):
     log = logger.Logger.get_logger()
     if fill_ram_percentage <= 0.0:
         fill_ram_percentage = 5.0
     client = MemcachedClientHelper.direct_client(serverInfo, bucket_name)
     #populate key
     rest = RestConnection(serverInfo)
     RestHelper(rest).vbucket_map_ready(bucket_name, 60)
     vbucket_count = len(rest.get_vbuckets(bucket_name))
     testuuid = uuid.uuid4()
     info = rest.get_bucket(bucket_name)
     emptySpace = info.stats.ram - info.stats.memUsed
     log.info('emptySpace : {0} fill_ram_percentage : {1}'.format(emptySpace, fill_ram_percentage))
     fill_space = (emptySpace * fill_ram_percentage) / 100.0
     log.info("fill_space {0}".format(fill_space))
     # each packet can be 10 KB
     packetSize = int(10 * 1024)
     number_of_buckets = int(fill_space) / packetSize
     log.info('packetSize: {0}'.format(packetSize))
     log.info('memory usage before key insertion : {0}'.format(info.stats.memUsed))
     log.info('inserting {0} new keys to memcached @ {0}'.format(number_of_buckets, serverInfo.ip))
     keys = ["key_%s_%d" % (testuuid, i) for i in range(number_of_buckets)]
     inserted_keys = []
     for key in keys:
         vbucketId = crc32.crc32_hash(key) & (vbucket_count - 1)
         client.vbucketId = vbucketId
         try:
             client.set(key, 0, 0, key)
             inserted_keys.append(key)
         except mc_bin_client.MemcachedError as error:
             log.error(error)
             client.close()
             log.error("unable to push key : {0} to vbucket : {1}".format(key, client.vbucketId))
             if test:
                 test.fail("unable to push key : {0} to vbucket : {1}".format(key, client.vbucketId))
             else:
                 break
     client.close()
     return inserted_keys
示例#28
0
 def test_node_memcached_failure(self):
     timeout = self.timeout / 2
     status = self.rest.update_autoreprovision_settings(True, 1)
     if not status:
         self.fail('failed to change autoreprovision_settings!')
     self.sleep(5)
     self._pause_couchbase(self.server_fail)
     self.sleep(5)
     AutoReprovisionBaseTest.wait_for_warmup_or_assert(self.master, 1,
                                                       timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                       self)
     RemoteUtilHelper.common_basic_setup([self.server_fail])
     AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 0,
                                                         timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                         self)
     helper = RestHelper(self.rest)
     self.assertTrue(helper.is_cluster_healthy(), "cluster status is not healthy")
     self.assertTrue(helper.is_cluster_rebalanced(), "cluster is not balanced")
     buckets = self.rest.get_buckets()
     for bucket in buckets:
         self.verify_loaded_data(self.master, bucket.name, self.loaded_items[bucket.name])
示例#29
0
 def test_setting_propogation_rebalance_in(self):
     expected_curl = self.set_tmpspace()
     self.assertEqual(expected_curl['queryTmpSpaceSize'], self.tmp_size)
     expected_dir = self.set_directory()
     self.assertEqual(expected_dir['queryTmpSpaceDir'], self.directory_path)
     services_in = ["n1ql", "index", "data"]
     rebalance = self.cluster.async_rebalance(
         self.servers[:self.nodes_init], [self.servers[self.nodes_init]],
         [],
         services=services_in)
     reached = RestHelper(self.rest).rebalance_reached()
     self.assertTrue(reached, "rebalance failed, stuck or did not complete")
     self.sleep(1)
     curl_url = "http://%s:%s/settings/querySettings" % (self.servers[
         self.nodes_init].ip, self.servers[self.nodes_init].port)
     curl_output = self.shell.execute_command(
         "%s -u Administrator:password %s" % (self.curl_path, curl_url))
     expected_curl = self.convert_list_to_json(curl_output[0])
     self.assertEqual(expected_curl['queryTmpSpaceSize'], self.tmp_size)
     self.assertEqual(expected_curl['queryTmpSpaceDir'],
                      self.directory_path)
示例#30
0
 def rebalance_in_with_cluster_password_change(self):
     new_password = self.input.param("new_password", "new_pass")
     servs_result = self.servers[:self.nodes_init + self.nodes_in]
     rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init],
                                              self.servers[self.nodes_init:self.nodes_init + self.nodes_in],
                                              [])
     old_pass = self.master.rest_password
     self.sleep(10, "Wait for rebalance have some progress")
     self.change_password(new_password=new_password)
     try:
         rebalance.result()
         self.log.exception("rebalance should be failed when password is changing")
         self.verify_unacked_bytes_all_buckets()
     except Exception as ex:
         self.sleep(10, "wait for rebalance failed")
         rest = RestConnection(self.master)
         self.log.info("Latest logs from UI:")
         for i in rest.get_logs(): self.log.error(i)
         self.assertFalse(RestHelper(rest).is_cluster_rebalanced())
     finally:
         self.change_password(new_password=old_pass)