示例#1
0
 def test_node_reboot(self):
     wait_timeout = 120
     timeout = self.timeout / 2
     status = self.rest.update_autoreprovision_settings(True, 1)
     if not status:
         self.fail('failed to change autoreprovision_settings!')
     self.sleep(5)
     shell = RemoteMachineShellConnection(self.server_fail)
     if shell.extract_remote_info().type.lower() == 'windows':
         o, r = shell.execute_command("shutdown -r -f -t 0")
     elif shell.extract_remote_info().type.lower() == 'linux':
         o, r = shell.execute_command("reboot")
     shell.log_command_output(o, r)
     if shell.extract_remote_info().type.lower() == 'windows':
         time.sleep(wait_timeout * 5)
     else:
         time.sleep(wait_timeout)
     # disable firewall on the node
     shell = RemoteMachineShellConnection(self.server_fail)
     shell.disable_firewall()
     AutoReprovisionBaseTest.wait_for_failover_or_assert(
         self.master, 0,
         timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)
     helper = RestHelper(self.rest)
     self.assertTrue(helper.is_cluster_healthy(),
                     "cluster status is not healthy")
     self.assertFalse(helper.is_cluster_rebalanced(), "cluster is balanced")
     self.rest.rebalance(
         otpNodes=[node.id for node in self.rest.node_statuses()],
         ejectedNodes=[])
     self.assertTrue(self.rest.monitorRebalance())
     buckets = self.rest.get_buckets()
     for bucket in buckets:
         self.verify_loaded_data(self.master, bucket.name,
                                 self.loaded_items[bucket.name])
 def test_node_reboot(self):
     wait_timeout = 120
     timeout = self.timeout / 2
     status = self.rest.update_autoreprovision_settings(True, 1)
     if not status:
         self.fail('failed to change autoreprovision_settings!')
     self.sleep(5)
     shell = RemoteMachineShellConnection(self.server_fail)
     if shell.extract_remote_info().type.lower() == 'windows':
         o, r = shell.execute_command("shutdown -r -f -t 0")
     elif shell.extract_remote_info().type.lower() == 'linux':
         o, r = shell.execute_command("reboot")
     shell.log_command_output(o, r)
     if shell.extract_remote_info().type.lower() == 'windows':
         time.sleep(wait_timeout * 5)
     else:
         time.sleep(wait_timeout)
     # disable firewall on the node
     shell = RemoteMachineShellConnection(self.server_fail)
     shell.disable_firewall()
     AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 0,
                                                         timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                         self)
     helper = RestHelper(self.rest)
     self.assertTrue(helper.is_cluster_healthy(), "cluster status is not healthy")
     self.assertFalse(helper.is_cluster_rebalanced(), "cluster is balanced")
     self.rest.rebalance(otpNodes=[node.id for node in self.rest.node_statuses()], ejectedNodes=[])
     self.assertTrue(self.rest.monitorRebalance())
     buckets = self.rest.get_buckets()
     for bucket in buckets:
         self.verify_loaded_data(self.master, bucket.name, self.loaded_items[bucket.name])
示例#3
0
 def test_node_cb_restart(self):
     timeout = self.timeout / 2
     status = self.rest.update_autoreprovision_settings(True, 1)
     if not status:
         self.fail('failed to change autoreprovision_settings!')
     self.sleep(5)
     shell = RemoteMachineShellConnection(self.server_fail)
     shell.restart_couchbase()
     AutoReprovisionBaseTest.wait_for_failover_or_assert(
         self.master, 1,
         timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)
     AutoReprovisionBaseTest.wait_for_failover_or_assert(
         self.master, 0,
         timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)
     self.sleep(5)
     helper = RestHelper(self.rest)
     self.assertTrue(helper.is_cluster_healthy(),
                     "cluster status is not healthy")
     self.assertFalse(helper.is_cluster_rebalanced(),
                      "cluster is not balanced")
     self.rest.rebalance(
         otpNodes=[node.id for node in self.rest.node_statuses()],
         ejectedNodes=[])
     self.assertTrue(self.rest.monitorRebalance())
     buckets = self.rest.get_buckets()
     for bucket in buckets:
         self.verify_loaded_data(self.master, bucket.name,
                                 self.loaded_items[bucket.name])
示例#4
0
    def test_two_failed_nodes(self):
        timeout = self.timeout / 2
        server_fail1 = self.servers[1]
        server_fail2 = self.servers[2]
        status = self.rest.update_autoreprovision_settings(True, 1)
        if not status:
            self.fail('failed to change autoreprovision_settings!')
        self.sleep(5)
        self.log.info("stopping the first server")
        self._stop_couchbase(server_fail1)
        AutoReprovisionBaseTest.wait_for_failover_or_assert(
            self.master, 1,
            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)

        self.log.info("stopping the second server")
        self._stop_couchbase(server_fail2)
        AutoReprovisionBaseTest.wait_for_failover_or_assert(
            self.master, 2,
            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)
        helper = RestHelper(self.rest)
        self.assertFalse(helper.is_cluster_healthy(),
                         "cluster status is healthy")
        self.assertTrue(helper.is_cluster_rebalanced(),
                        "cluster is not balanced")
        self._start_couchbase(server_fail1)

        self._start_couchbase(server_fail1)
        AutoReprovisionBaseTest.wait_for_failover_or_assert(
            self.master, 1,
            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)
        self._start_couchbase(server_fail2)
        AutoReprovisionBaseTest.wait_for_failover_or_assert(
            self.master, 0,
            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)
        self.sleep(20)
        helper = RestHelper(self.rest)
        self.assertTrue(helper.is_cluster_healthy(),
                        "cluster status is healthy")
        self.assertFalse(helper.is_cluster_rebalanced(), "cluster is balanced")
        self.rest.rebalance(
            otpNodes=[node.id for node in self.rest.node_statuses()],
            ejectedNodes=[])
        self.assertTrue(self.rest.monitorRebalance())
    def test_two_failed_nodes(self):
        timeout = self.timeout / 2
        server_fail1 = self.servers[1]
        server_fail2 = self.servers[2]
        status = self.rest.update_autoreprovision_settings(True, 1)
        if not status:
            self.fail('failed to change autoreprovision_settings!')
        self.sleep(5)
        self.log.info("stopping the first server")
        self._stop_couchbase(server_fail1)
        AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 1,
                                                            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                            self)

        self.log.info("stopping the second server")
        self._stop_couchbase(server_fail2)
        AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 2,
                                                            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                            self)
        helper = RestHelper(self.rest)
        self.assertFalse(helper.is_cluster_healthy(), "cluster status is healthy")
        self.assertTrue(helper.is_cluster_rebalanced(), "cluster is not balanced")
        self._start_couchbase(server_fail1)

        self._start_couchbase(server_fail1)
        AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 1,
                                                            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                            self)
        self._start_couchbase(server_fail2)
        AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 0,
                                                            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                            self)
        self.sleep(20)
        helper = RestHelper(self.rest)
        self.assertTrue(helper.is_cluster_healthy(), "cluster status is healthy")
        self.assertFalse(helper.is_cluster_rebalanced(), "cluster is balanced")
        self.rest.rebalance(otpNodes=[node.id for node in self.rest.node_statuses()], ejectedNodes=[])
        self.assertTrue(self.rest.monitorRebalance())
示例#6
0
 def test_node_memcached_failure(self):
     timeout = self.timeout / 2
     status = self.rest.update_autoreprovision_settings(True, 1)
     if not status:
         self.fail('failed to change autoreprovision_settings!')
     self.sleep(5)
     self._pause_couchbase(self.server_fail)
     self.sleep(5)
     AutoReprovisionBaseTest.wait_for_warmup_or_assert(self.master, 1,
                                                       timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                       self)
     RemoteUtilHelper.common_basic_setup([self.server_fail])
     AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 0,
                                                         timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                         self)
     helper = RestHelper(self.rest)
     self.assertTrue(helper.is_cluster_healthy(), "cluster status is not healthy")
     self.assertTrue(helper.is_cluster_rebalanced(), "cluster is not balanced")
     buckets = self.rest.get_buckets()
     for bucket in buckets:
         self.verify_loaded_data(self.master, bucket.name, self.loaded_items[bucket.name])
 def test_node_cb_restart(self):
     timeout = self.timeout / 2
     status = self.rest.update_autoreprovision_settings(True, 1)
     if not status:
         self.fail('failed to change autoreprovision_settings!')
     self.sleep(5)
     shell = RemoteMachineShellConnection(self.server_fail)
     shell.restart_couchbase()
     AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 1,
                                                         timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                         self)
     AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 0,
                                                         timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                         self)
     self.sleep(5)
     helper = RestHelper(self.rest)
     self.assertTrue(helper.is_cluster_healthy(), "cluster status is not healthy")
     self.assertFalse(helper.is_cluster_rebalanced(), "cluster is not balanced")
     self.rest.rebalance(otpNodes=[node.id for node in self.rest.node_statuses()], ejectedNodes=[])
     self.assertTrue(self.rest.monitorRebalance())
     buckets = self.rest.get_buckets()
     for bucket in buckets:
         self.verify_loaded_data(self.master, bucket.name, self.loaded_items[bucket.name])
示例#8
0
    def test_ui_logs(self):
        timeout = self.timeout / 2
        server_fail1 = self.servers[1]
        server_fail2 = self.servers[2]
        status = self.rest.update_autoreprovision_settings(True, 2)
        if not status:
            self.fail('failed to change autoreprovision_settings!')
        self.sleep(5)
        logs = self.rest.get_logs(5)
        self.assertTrue(
            u'Enabled auto-reprovision config with max_nodes set to 2' in
            [l['text'] for l in logs])

        self.log.info("stopping the first server")
        self._stop_couchbase(server_fail1)
        AutoReprovisionBaseTest.wait_for_failover_or_assert(
            self.master, 1,
            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)

        self.log.info("resetting the autoreprovision count")
        if not self.rest.reset_autoreprovision():
            self.fail('failed to reset autoreprovision count!')
        logs = self.rest.get_logs(5)
        self.assertTrue(u'auto-reprovision count reset from 0' in
                        [l['text'] for l in logs])

        self.log.info("stopping the second server")
        self._stop_couchbase(server_fail2)
        AutoReprovisionBaseTest.wait_for_failover_or_assert(
            self.master, 2,
            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)
        settings = self.rest.get_autoreprovision_settings()
        self.assertEquals(settings.enabled, True)
        self.assertEquals(settings.max_nodes, 2)
        self.assertEquals(settings.count, 0)
        self._start_couchbase(server_fail2)
        self._start_couchbase(server_fail1)
        self.sleep(30)
        settings = self.rest.get_autoreprovision_settings()
        self.assertEquals(settings.enabled, True)
        self.assertEquals(settings.max_nodes, 2)
        self.assertEquals(settings.count, 2)
        logs = self.rest.get_logs(5)
        self.assertTrue(
            u'auto-reprovision is disabled as maximum number of nodes (2) '
            u'that can be auto-reprovisioned has been reached.' in
            [l['text'] for l in logs])

        self.log.info("resetting the autoreprovision count")
        if not self.rest.reset_autoreprovision():
            self.fail('failed to reset autoreprovision count!')
        settings = self.rest.get_autoreprovision_settings()
        self.assertEquals(settings.enabled, True)
        self.assertEquals(settings.max_nodes, 2)
        self.assertEquals(settings.count, 0)
        logs = self.rest.get_logs(5)
        self.assertTrue(u'auto-reprovision count reset from 2' in
                        [l['text'] for l in logs])

        helper = RestHelper(self.rest)
        self.assertTrue(helper.is_cluster_healthy(),
                        "cluster status is not healthy")
        self.assertFalse(helper.is_cluster_rebalanced(), "cluster is balanced")
        self.rest.rebalance(
            otpNodes=[node.id for node in self.rest.node_statuses()],
            ejectedNodes=[])
        self.assertTrue(self.rest.monitorRebalance())
        logs = self.rest.get_logs(5)
        # https://issues.couchbase.com/browse/MB-24520
        self.assertFalse(
            u'Reset auto-failover count' in [l['text'] for l in logs])
        self.assertTrue(
            u'Rebalance completed successfully.' in [l['text'] for l in logs])
示例#9
0
 def test_node_memcached_failure_in_series(self):
     timeout = self.timeout / 2
     status = self.rest.update_autoreprovision_settings(True, 1)
     if not status:
         self.fail('failed to change autoreprovision_settings!')
     self.sleep(5)
     data_lost = False
     for i in reversed(xrange(len(self.servers))):
         print self.servers[i]
         operation = random.choice(
             ['stop', 'memcached_failure', 'restart', 'failover', 'reboot'])
         shell = RemoteMachineShellConnection(self.servers[i])
         print "operation", operation
         if i == 0:
             self.master = self.servers[1]
         if operation == 'stop':
             self._stop_couchbase(self.servers[i])
         elif operation == 'memcached_failure':
             self._pause_couchbase(self.servers[i])
         elif operation == 'restart':
             shell.restart_couchbase()
         elif operation == 'failover':
             RemoteUtilHelper.enable_firewall(self.servers[i])
         elif operation == 'reboot':
             if shell.extract_remote_info().type.lower() == 'windows':
                 o, r = shell.execute_command("shutdown -r -f -t 0")
                 self.sleep(200)
             elif shell.extract_remote_info().type.lower() == 'linux':
                 o, r = shell.execute_command("reboot")
             shell.log_command_output(o, r)
             self.sleep(60)
         self.sleep(40)
         if operation == 'memcached_failure':
             AutoReprovisionBaseTest.wait_for_warmup_or_assert(
                 self.master, 1,
                 timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                 self)
         if operation != 'restart' and operation != 'memcached_failure' and operation != 'reboot':
             AutoReprovisionBaseTest.wait_for_failover_or_assert(
                 self.master, 1,
                 timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                 self)
         if operation != 'restart':
             RemoteUtilHelper.common_basic_setup([self.servers[i]])
         AutoReprovisionBaseTest.wait_for_failover_or_assert(
             self.master, 0,
             timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME, self)
         helper = RestHelper(RestConnection(self.master))
         self.assertTrue(helper.is_cluster_healthy(),
                         "cluster status is not healthy")
         self.sleep(40)
         if operation == 'memcached_failure' or operation == 'failover':
             self.assertTrue(helper.is_cluster_rebalanced(),
                             "cluster is not balanced")
         else:
             if 'kv' in self.servers[i].services and self.replicas > 0:
                 self.assertFalse(helper.is_cluster_rebalanced(),
                                  "cluster is balanced")
                 self.rest.rebalance(otpNodes=[
                     node.id for node in self.rest.node_statuses()
                 ],
                                     ejectedNodes=[])
                 self.assertTrue(self.rest.monitorRebalance())
             else:
                 self.assertTrue(helper.is_cluster_rebalanced(),
                                 "cluster is not balanced")
         buckets = self.rest.get_buckets()
         if self.replicas == 0 and (operation == 'restart'
                                    or operation == 'reboot'):
             data_lost = True
         for bucket in buckets:
             if not data_lost:
                 self.verify_loaded_data(self.master, bucket.name,
                                         self.loaded_items[bucket.name])
示例#10
0
    def test_ui_logs(self):
        timeout = self.timeout / 2
        server_fail1 = self.servers[1]
        server_fail2 = self.servers[2]
        status = self.rest.update_autoreprovision_settings(True, 2)
        if not status:
            self.fail('failed to change autoreprovision_settings!')
        self.sleep(5)
        logs = self.rest.get_logs(5)
        self.assertTrue(u'Enabled auto-reprovision config with max_nodes set to 2' in [l['text'] for l in logs])

        self.log.info("stopping the first server")
        self._stop_couchbase(server_fail1)
        AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 1,
                                                            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                            self)

        self.log.info("resetting the autoreprovision count")
        if not self.rest.reset_autoreprovision():
            self.fail('failed to reset autoreprovision count!')
        logs = self.rest.get_logs(5)
        self.assertTrue(u'auto-reprovision count reset from 0' in [l['text'] for l in logs])

        self.log.info("stopping the second server")
        self._stop_couchbase(server_fail2)
        AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 2,
                                                            timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                            self)
        settings = self.rest.get_autoreprovision_settings()
        self.assertEquals(settings.enabled, True)
        self.assertEquals(settings.max_nodes, 2)
        self.assertEquals(settings.count, 0)
        self._start_couchbase(server_fail2)
        self._start_couchbase(server_fail1)
        self.sleep(30)
        settings = self.rest.get_autoreprovision_settings()
        self.assertEquals(settings.enabled, True)
        self.assertEquals(settings.max_nodes, 2)
        self.assertEquals(settings.count, 2)
        logs = self.rest.get_logs(5)
        self.assertTrue(u'auto-reprovision is disabled as maximum number of nodes (2) '
                        u'that can be auto-reprovisioned has been reached.' in [l['text'] for l in logs])

        self.log.info("resetting the autoreprovision count")
        if not self.rest.reset_autoreprovision():
            self.fail('failed to reset autoreprovision count!')
        settings = self.rest.get_autoreprovision_settings()
        self.assertEquals(settings.enabled, True)
        self.assertEquals(settings.max_nodes, 2)
        self.assertEquals(settings.count, 0)
        logs = self.rest.get_logs(5)
        self.assertTrue(u'auto-reprovision count reset from 2' in [l['text'] for l in logs])

        helper = RestHelper(self.rest)
        self.assertTrue(helper.is_cluster_healthy(), "cluster status is not healthy")
        self.assertFalse(helper.is_cluster_rebalanced(), "cluster is balanced")
        self.rest.rebalance(otpNodes=[node.id for node in self.rest.node_statuses()], ejectedNodes=[])
        self.assertTrue(self.rest.monitorRebalance())
        logs = self.rest.get_logs(5)
        # https://issues.couchbase.com/browse/MB-24520
        self.assertFalse(u'Reset auto-failover count' in [l['text'] for l in logs])
        self.assertTrue(u'Rebalance completed successfully.' in [l['text'] for l in logs])
示例#11
0
 def test_node_memcached_failure_in_series(self):
     timeout = self.timeout / 2
     status = self.rest.update_autoreprovision_settings(True, 1)
     if not status:
         self.fail('failed to change autoreprovision_settings!')
     self.sleep(5)
     data_lost = False
     for i in reversed(xrange(len(self.servers))):
         print self.servers[i]
         operation = random.choice(['stop', 'memcached_failure', 'restart', 'failover', 'reboot'])
         shell = RemoteMachineShellConnection(self.servers[i])
         print "operation", operation
         if i == 0:
             self.master = self.servers[1]
         if operation == 'stop':
             self._stop_couchbase(self.servers[i])
         elif operation == 'memcached_failure':
             self._pause_couchbase(self.servers[i])
         elif operation == 'restart':
             shell.restart_couchbase()
         elif operation == 'failover':
             RemoteUtilHelper.enable_firewall(self.servers[i])
         elif operation == 'reboot':
             if shell.extract_remote_info().type.lower() == 'windows':
                 o, r = shell.execute_command("shutdown -r -f -t 0")
                 self.sleep(200)
             elif shell.extract_remote_info().type.lower() == 'linux':
                 o, r = shell.execute_command("reboot")
             shell.log_command_output(o, r)
             self.sleep(60)
         self.sleep(40)
         if operation == 'memcached_failure':
             AutoReprovisionBaseTest.wait_for_warmup_or_assert(self.master, 1,
                                                               timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                               self)
         if operation != 'restart' and operation != 'memcached_failure' and operation != 'reboot':
             AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 1,
                                                                 timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                                 self)
         if operation != 'restart':
             RemoteUtilHelper.common_basic_setup([self.servers[i]])
         AutoReprovisionBaseTest.wait_for_failover_or_assert(self.master, 0,
                                                             timeout + AutoReprovisionBaseTest.MAX_FAIL_DETECT_TIME,
                                                             self)
         helper = RestHelper(RestConnection(self.master))
         self.assertTrue(helper.is_cluster_healthy(), "cluster status is not healthy")
         self.sleep(40)
         if operation == 'memcached_failure' or operation == 'failover':
             self.assertTrue(helper.is_cluster_rebalanced(), "cluster is not balanced")
         else:
             if 'kv' in self.servers[i].services and self.replicas > 0:
                 self.assertFalse(helper.is_cluster_rebalanced(), "cluster is balanced")
                 self.rest.rebalance(otpNodes=[node.id for node in self.rest.node_statuses()], ejectedNodes=[])
                 self.assertTrue(self.rest.monitorRebalance())
             else:
                 self.assertTrue(helper.is_cluster_rebalanced(), "cluster is not balanced")
         buckets = self.rest.get_buckets()
         if self.replicas == 0 and (operation == 'restart' or operation == 'reboot'):
             data_lost = True
         for bucket in buckets:
             if not data_lost:
                 self.verify_loaded_data(self.master, bucket.name, self.loaded_items[bucket.name])