def ceph_partitions_repetitive_cold_restart(self):
        """Ceph-osd partitions on 30% ~start rally~ repetitive cold restart

        Scenario:
            1. Revert snapshot 'prepare_load_ceph_ha'
            2. Wait until MySQL Galera is UP on some controller
            3. Check Ceph status
            4. Run ostf
            5. Fill ceph partitions on all nodes up to 30%
            6. Check Ceph status
            7. Disable UMM
            8. Run RALLY
            9. 100 times repetitive reboot:
            10. Cold restart of all nodes
            11. Wait for HA services ready
            12. Wait until MySQL Galera is UP on some controller
            13. Run ostf

        Duration 1700m
        Snapshot ceph_partitions_repetitive_cold_restart
        """
        self.show_step(1, initialize=True)
        self.env.revert_snapshot("prepare_load_ceph_ha")

        self.show_step(2)
        primary_controller = self.fuel_web.get_nailgun_primary_node(
            self.env.d_env.nodes().slaves[0])
        self.fuel_web.wait_mysql_galera_is_up([primary_controller.name])
        cluster_id = self.fuel_web.get_last_created_cluster()

        self.show_step(3)
        self.fuel_web.check_ceph_status(cluster_id)

        self.show_step(4)
        self.fuel_web.run_ostf(cluster_id=cluster_id)

        self.show_step(5)
        ceph_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
            cluster_id, ['ceph-osd'])
        for node in ceph_nodes:
            ip = node['ip']
            file_dir = self.ssh_manager.execute_on_remote(
                ip=ip,
                cmd="mount | grep -m 1 ceph | awk '{printf($3)}'")['stdout'][0]
            fill_space(ip, file_dir, 30 * 1024)

        self.show_step(6)
        self.fuel_web.check_ceph_status(cluster_id)

        self.show_step(7)

        for node in self.fuel_web.client.list_cluster_nodes(cluster_id):
            change_config(node['ip'], umm=False)

        self.show_step(8)
        assert_true(settings.PATCHING_RUN_RALLY,
                    'PATCHING_RUN_RALLY was not set in true')
        rally_benchmarks = {}
        benchmark_results = {}
        for tag in set(settings.RALLY_TAGS):
            rally_benchmarks[tag] = RallyBenchmarkTest(
                container_repo=settings.RALLY_DOCKER_REPO,
                environment=self.env,
                cluster_id=cluster_id,
                test_type=tag
            )
            benchmark_results[tag] = rally_benchmarks[tag].run()
            logger.debug(benchmark_results[tag].show())

        self.show_step(9)
        for i in xrange(settings.RESTART_COUNT):
            self.show_step(10, 'number {}'.format(i + 1), initialize=True)
            self.fuel_web.cold_restart_nodes(
                self.env.d_env.get_nodes(name__in=[
                    'slave-01',
                    'slave-02',
                    'slave-03',
                    'slave-04',
                    'slave-05']))

            self.show_step(11)
            self.fuel_web.assert_ha_services_ready(cluster_id)

            self.fuel_web.assert_os_services_ready(cluster_id)

            self.show_step(12)
            self.fuel_web.wait_mysql_galera_is_up([primary_controller.name])

            try:
                self.fuel_web.run_single_ostf_test(
                    cluster_id, test_sets=['smoke'],
                    test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                        'Create volume and attach it to instance'))
            except AssertionError:
                logger.debug("Test failed from first probe,"
                             " we sleep 180 seconds and try one more time "
                             "and if it fails again - test will fail ")
                time.sleep(180)
                self.fuel_web.run_single_ostf_test(
                    cluster_id, test_sets=['smoke'],
                    test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                        'Create volume and attach it to instance'))
            self.show_step(13)
            # LB 1519018
            self.fuel_web.run_ostf(cluster_id=cluster_id)
            self.env.make_snapshot("ceph_partitions_repetitive_cold_restart")
Exemplo n.º 2
0
    def auto_cic_maintenance_mode(self):
        """Check auto maintenance mode for controller

        Scenario:
            1. Revert snapshot
            2. Unexpected reboot
            3. Wait until controller is switching in maintenance mode
            4. Exit maintenance mode
            5. Check the controller become available

        Duration 155m
        """
        self.env.revert_snapshot('cic_maintenance_mode')

        cluster_id = self.fuel_web.get_last_created_cluster()

        # Select a non-primary controller
        regular_ctrl = self.fuel_web.get_nailgun_node_by_name("slave-02")
        dregular_ctrl = self.fuel_web.get_devops_node_by_nailgun_node(
            regular_ctrl)
        _ip = regular_ctrl['ip']
        _id = regular_ctrl['id']

        asserts.assert_true('True' in check_available_mode(_ip),
                            "Maintenance mode is not available")

        change_config(_ip, reboot_count=0)

        logger.info('Change UMM.CONF on node-{0}'.format(_id))

        logger.info('Unexpected reboot on node-{0}'.format(_id))

        command = 'reboot --force >/dev/null & '

        self.ssh_manager.execute_on_remote(ip=_ip, cmd=command)

        wait(
            lambda: not checkers.check_ping(self.env.get_admin_node_ip(), _ip),
            timeout=60 * 10,
            timeout_msg='Node {} still responds to ping'.format(
                dregular_ctrl.name))

        self.fuel_web.wait_node_is_offline(dregular_ctrl)

        logger.info('Check that node-{0} in maintenance mode after'
                    ' unexpected reboot'.format(_id))
        asserts.assert_true(
            checkers.check_ping(self.env.get_admin_node_ip(),
                                _ip,
                                deadline=600),
            "Host {0} is not reachable by ping during 600 sec".format(_ip))

        asserts.assert_true('True' in check_auto_mode(_ip),
                            "Maintenance mode is not switched on")

        logger.info('turn off Maintenance mode')
        self.ssh_manager.execute_on_remote(ip=_ip, cmd="umm off")
        time.sleep(30)

        change_config(_ip)

        self.fuel_web.wait_node_is_online(dregular_ctrl)

        # Wait until MySQL Galera is UP on some controller
        self.fuel_web.wait_mysql_galera_is_up([dregular_ctrl.name])

        # Wait until Cinder services UP on a controller
        self.fuel_web.wait_cinder_is_up([dregular_ctrl.name])

        # Wait until RabbitMQ cluster is UP
        wait_pass(lambda: self.fuel_web.run_single_ostf_test(
            cluster_id,
            test_sets=['ha'],
            test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                'RabbitMQ availability')),
                  timeout=1500)
        logger.info('RabbitMQ cluster is available')

        # Wait until all Openstack services are UP
        wait_pass(lambda: self.fuel_web.run_single_ostf_test(
            cluster_id,
            test_sets=['sanity'],
            test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                'Check that required services are running')),
                  timeout=1500)
        logger.info("Required services are running")

        try:
            self.fuel_web.run_ostf(cluster_id,
                                   test_sets=['smoke', 'sanity', 'ha'])
        except AssertionError:
            logger.debug("Test failed from first probe,"
                         " we sleep 600 second try one more time"
                         " and if it fails again - test will fails ")
            time.sleep(600)
            self.fuel_web.run_ostf(cluster_id,
                                   test_sets=['smoke', 'sanity', 'ha'])
Exemplo n.º 3
0
    def negative_auto_cic_maintenance_mode(self):
        """Check negative scenario for auto maintenance mode

        Scenario:
            1. Revert snapshot
            2. Disable UMM
            3. Change UMM.CONF
            4. Unexpected reboot
            5. Check the controller not switching in maintenance mode
            6. Check the controller become available

        Duration 85m
        """
        self.env.revert_snapshot('cic_maintenance_mode')

        cluster_id = self.fuel_web.get_last_created_cluster()

        # Select a non-primary controller
        regular_ctrl = self.fuel_web.get_nailgun_node_by_name("slave-02")
        dregular_ctrl = self.fuel_web.get_devops_node_by_nailgun_node(
            regular_ctrl)
        _ip = regular_ctrl['ip']
        _id = regular_ctrl['id']

        asserts.assert_true('True' in check_available_mode(_ip),
                            "Maintenance mode is not available")
        logger.info('Disable UMM  on node-{0}'.format(_id))

        change_config(_ip, umm=False, reboot_count=0)

        asserts.assert_false('True' in check_available_mode(_ip),
                             "Maintenance mode should not be available")

        logger.info('Unexpected reboot on node-{0}'.format(_id))

        self.ssh_manager.check_call(ip=_ip, command='reboot >/dev/null & ')

        wait(
            lambda: not checkers.check_ping(self.env.get_admin_node_ip(), _ip),
            timeout=60 * 10,
            timeout_msg='Node {} still responds to ping'.format(
                dregular_ctrl.name))

        # Node don't have enough time for set offline status
        # after reboot
        # Just waiting

        asserts.assert_true(
            checkers.check_ping(self.env.get_admin_node_ip(),
                                _ip,
                                deadline=600),
            "Host {0} is not reachable by ping during 600 sec".format(_ip))

        self.fuel_web.wait_node_is_online(dregular_ctrl)

        logger.info('Check that node-{0} not in maintenance mode after'
                    ' unexpected reboot'.format(_id))

        wait(lambda: tcp_ping(_ip, 22),
             timeout=60 * 10,
             timeout_msg='Node {} still is not available by SSH'.format(
                 dregular_ctrl.name))

        asserts.assert_false('True' in check_auto_mode(_ip),
                             "Maintenance mode should not switched")

        # Wait until MySQL Galera is UP on some controller
        self.fuel_web.wait_mysql_galera_is_up([dregular_ctrl.name])

        # Wait until Cinder services UP on a controller
        self.fuel_web.wait_cinder_is_up([dregular_ctrl.name])

        # Wait until RabbitMQ cluster is UP
        wait_pass(lambda: self.fuel_web.run_single_ostf_test(
            cluster_id,
            test_sets=['ha'],
            test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                'RabbitMQ availability')),
                  timeout=1500)
        logger.info('RabbitMQ cluster is available')

        # TODO(astudenov): add timeout_msg
        wait_pass(lambda: self.fuel_web.run_single_ostf_test(
            cluster_id,
            test_sets=['sanity'],
            test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                'Check that required services are running')),
                  timeout=1500)
        logger.info("Required services are running")

        try:
            self.fuel_web.run_ostf(cluster_id,
                                   test_sets=['smoke', 'sanity', 'ha'])
        except AssertionError:
            logger.debug("Test failed from first probe,"
                         " we sleep 600 second try one more time"
                         " and if it fails again - test will fails ")
            time.sleep(600)
            self.fuel_web.run_ostf(cluster_id,
                                   test_sets=['smoke', 'sanity', 'ha'])
    def negative_auto_cic_maintenance_mode(self):
        """Check negative scenario for auto maintenance mode

        Scenario:
            1. Revert snapshot
            2. Disable UMM
            3. Change UMM.CONF
            4. Unexpected reboot
            5. Check the controller not switching in maintenance mode
            6. Check the controller become available

        Duration 85m
        """
        self.env.revert_snapshot('cic_maintenance_mode')

        cluster_id = self.fuel_web.get_last_created_cluster()

        # Select a non-primary controller
        regular_ctrl = self.fuel_web.get_nailgun_node_by_name("slave-02")
        dregular_ctrl = self.fuel_web.get_devops_node_by_nailgun_node(
            regular_ctrl)
        _ip = regular_ctrl['ip']
        _id = regular_ctrl['id']

        asserts.assert_true('True' in check_available_mode(_ip),
                            "Maintenance mode is not available")
        logger.info('Disable UMM  on node-{0}'.format(_id))

        change_config(_ip, umm=False, reboot_count=0)

        asserts.assert_false('True' in check_available_mode(_ip),
                             "Maintenance mode should not be available")

        command = 'reboot --force >/dev/null & '

        logger.info('Unexpected reboot on node-{0}'
                    .format(_id))

        self.ssh_manager.execute_on_remote(
            ip=_ip,
            cmd=command)

        wait(lambda:
             not checkers.check_ping(self.env.get_admin_node_ip(),
                                     _ip),
             timeout=60 * 10,
             timeout_msg='Node {} still responds to ping'.format(
                 dregular_ctrl.name))

        # Node don't have enough time for set offline status
        # after reboot --force
        # Just waiting

        asserts.assert_true(
            checkers.check_ping(self.env.get_admin_node_ip(),
                                _ip,
                                deadline=600),
            "Host {0} is not reachable by ping during 600 sec"
            .format(_ip))

        self.fuel_web.wait_node_is_online(dregular_ctrl)

        logger.info('Check that node-{0} not in maintenance mode after'
                    ' unexpected reboot'.format(_id))

        wait(lambda: tcp_ping(_ip, 22),
             timeout=60 * 10,
             timeout_msg='Node {} still is not available by SSH'.format(
                 dregular_ctrl.name))

        asserts.assert_false('True' in check_auto_mode(_ip),
                             "Maintenance mode should not switched")

        # Wait until MySQL Galera is UP on some controller
        self.fuel_web.wait_mysql_galera_is_up(
            [dregular_ctrl.name])

        # Wait until Cinder services UP on a controller
        self.fuel_web.wait_cinder_is_up(
            [dregular_ctrl.name])

        # Wait until RabbitMQ cluster is UP
        wait_pass(lambda:
                  self.fuel_web.run_single_ostf_test(
                      cluster_id, test_sets=['ha'],
                      test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                          'RabbitMQ availability')),
                  timeout=1500)
        logger.info('RabbitMQ cluster is available')

        # TODO(astudenov): add timeout_msg
        wait_pass(lambda:
                  self.fuel_web.run_single_ostf_test(
                      cluster_id, test_sets=['sanity'],
                      test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                          'Check that required services are running')),
                  timeout=1500)
        logger.info("Required services are running")

        try:
            self.fuel_web.run_ostf(cluster_id,
                                   test_sets=['smoke', 'sanity', 'ha'])
        except AssertionError:
            logger.debug("Test failed from first probe,"
                         " we sleep 600 second try one more time"
                         " and if it fails again - test will fails ")
            time.sleep(600)
            self.fuel_web.run_ostf(cluster_id,
                                   test_sets=['smoke', 'sanity', 'ha'])