def baremetal_deploy_virt_nodes_on_different_computes(self): """Baremetal deployment of a cluster with virtual nodes in HA mode; each virtual node on a separate compute Scenario: 1. Create cluster 2. Assign compute and virt roles to three slave nodes 3. Upload VM configuration for one VM to each slave node 4. Apply network template for the env and spawn the VMs 5. Assign controller role to VMs 6. Deploy cluster 7. Run OSTF 8. Mark 'mysql' partition to be preserved on one of controllers 9. Reinstall the controller 10. Verify that the reinstalled controller joined the Galera cluster and synced its state 11. Run OSTF 12. Gracefully reboot one controller using "reboot" command and wait till it comes up 13. Run OSTF 14. Forcefully reboot one controller using "reboot -f" command and wait till it comes up 15. Run OSTF 16. Gracefully reboot one compute using "reboot" command and wait till compute and controller come up 17. Run OSTF 18. Forcefully reboot one compute using "reboot -f" command and wait till compute and controller come up 19. Run OSTF Duration: 360m """ self.env.revert_snapshot("ready_with_3_slaves") self.show_step(1) checkers.enable_feature_group(self.env, "advanced") cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_HA, settings={ 'net_provider': 'neutron', 'net_segment_type': settings.NEUTRON_SEGMENT['vlan'] }) self.show_step(2) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['compute', 'virt'], 'slave-02': ['compute', 'virt'], 'slave-03': ['compute', 'virt'] }) self.show_step(3) for node in self.fuel_web.client.list_cluster_nodes(cluster_id): self.fuel_web.client.create_vm_nodes( node['id'], [{ "id": 1, "mem": 2, "cpu": 2, "vda_size": "100G" }]) self.show_step(4) self.update_virt_vm_template() net_template = get_network_template("baremetal_rf_ha") self.fuel_web.client.upload_network_template(cluster_id, net_template) self.fuel_web.spawn_vms_wait(cluster_id) wait(lambda: len(self.fuel_web.client.list_nodes()) == 6, timeout=60 * 60, timeout_msg=("Timeout waiting 2 available nodes, " "current nodes: \n{0}" + '\n'.join( ['Name: {0}, status: {1}, online: {2}'. format(i['name'], i['status'], i['online']) for i in self.fuel_web.client.list_nodes()]))) self.show_step(5) virt_nodes = { 'vslave-01': ['controller'], 'vslave-02': ['controller'], 'vslave-03': ['controller'] } self.update_virtual_nodes(cluster_id, virt_nodes) self.show_step(6) self.deploy_cluster_wait(cluster_id) self.show_step(7) self.fuel_web.run_ostf(cluster_id=cluster_id) self.show_step(8) virt_nodes = [n for n in self.fuel_web.client.list_nodes() if n['name'].startswith('vslave')] ctrl = virt_nodes[0] with self.ssh_manager.get_remote(self.ssh_manager.admin_ip) as admin: preserve_partition(admin, ctrl['id'], "mysql") self.show_step(9) task = self.fuel_web.client.provision_nodes( cluster_id, [str(ctrl['id'])]) self.fuel_web.assert_task_success(task) task = self.fuel_web.client.deploy_nodes( cluster_id, [str(ctrl['id'])]) self.fuel_web.assert_task_success(task) self.show_step(10) cmd = "mysql --connect_timeout=5 -sse \"SHOW STATUS LIKE 'wsrep%';\"" with self.ssh_manager.get_remote(self.ssh_manager.admin_ip) as admin: err_msg = ("Galera isn't ready on {0} node".format( ctrl['hostname'])) wait( lambda: admin.execute_through_host( ctrl['ip'], cmd, auth=self.ssh_auth)['exit_code'] == 0, timeout=10 * 60, timeout_msg=err_msg) cmd = ("mysql --connect_timeout=5 -sse \"SHOW STATUS LIKE " "'wsrep_local_state_comment';\"") err_msg = ("The reinstalled node {0} is not synced with the " "Galera cluster".format(ctrl['hostname'])) wait( # pylint: disable=no-member lambda: admin.execute_through_host( ctrl['ip'], cmd, auth=self.ssh_auth)['stdout'][0].split()[1] == "Synced", # pylint: enable=no-member timeout=10 * 60, timeout_msg=err_msg) self.show_step(11) self.fuel_web.run_ostf(cluster_id=cluster_id) self.show_step(12) self.show_step(13) self.show_step(14) self.show_step(15) cmds = {"reboot": "gracefully", "reboot -f >/dev/null &": "forcefully"} for cmd in cmds: with self.ssh_manager.get_remote(self.ssh_manager.admin_ip) as \ admin: asserts.assert_true( admin.execute_through_host( virt_nodes[1]['ip'], cmd, auth=self.ssh_auth, timeout=60)['exit_code'] == 0, "Failed to {0} reboot {1} controller" "node".format(cmds[cmd], virt_nodes[1]['name'])) self.wait_for_slave(virt_nodes[1]) self.fuel_web.run_ostf(cluster_id=cluster_id) self.show_step(16) self.show_step(17) self.show_step(18) self.show_step(19) compute = self.fuel_web.get_nailgun_cluster_nodes_by_roles( cluster_id, ['compute'])[0] for cmd in cmds: with self.ssh_manager.get_remote(self.ssh_manager.admin_ip) as \ admin: asserts.assert_true( admin.execute_through_host( compute['ip'], cmd, auth=self.ssh_auth, timeout=60)['exit_code'] == 0, "Failed to {0} reboot {1} compute" "node".format(cmds[cmd], compute['name'])) self.wait_for_slave(compute) for vm in virt_nodes: self.wait_for_slave(vm) self.fuel_web.run_ostf(cluster_id=cluster_id)
def compute_stop_reinstallation(self): """Verify stop reinstallation of compute. Scenario: 1. Revert the snapshot 2. Create an OS volume and OS instance 3. Mark 'cinder' and 'vm' partitions to be preserved 4. Stop reinstallation process of compute 5. Start the reinstallation process again 6. Run network verification 7. Run OSTF 8. Verify that the volume is present and has 'available' status after the node reinstallation 9. Verify that the VM is available and pingable after the node reinstallation Duration: 115m """ self.env.revert_snapshot("node_reinstallation_env") cluster_id = self.fuel_web.get_last_created_cluster() # Create an OS volume os_conn = os_actions.OpenStackActions( self.fuel_web.get_public_vip(cluster_id)) volume = os_conn.create_volume() # Create an OS instance cmp_host = os_conn.get_hypervisors()[0] net_label = self.fuel_web.get_cluster_predefined_networks_name( cluster_id)['private_net'] vm = os_conn.create_server_for_migration( neutron=True, availability_zone="nova:{0}".format( cmp_host.hypervisor_hostname), label=net_label) vm_floating_ip = os_conn.assign_floating_ip(vm) devops_helpers.wait( lambda: devops_helpers.tcp_ping(vm_floating_ip.ip, 22), timeout=120) cmp_nailgun = self.fuel_web.get_nailgun_node_by_fqdn( cmp_host.hypervisor_hostname) # Mark 'cinder' and 'vm' partitions to be preserved with self.env.d_env.get_admin_remote() as remote: preserve_partition(remote, cmp_nailgun['id'], "cinder") preserve_partition(remote, cmp_nailgun['id'], "vm") slave_nodes = self.fuel_web.client.list_cluster_nodes(cluster_id) devops_nodes = self.fuel_web.get_devops_nodes_by_nailgun_nodes( slave_nodes) logger.info('Stop reinstallation process') self._stop_reinstallation(self.fuel_web, cluster_id, [str(cmp_nailgun['id'])], devops_nodes) self.fuel_web.verify_network(cluster_id) logger.info('Start the reinstallation process again') NodeReinstallationEnv.reinstall_nodes( self.fuel_web, cluster_id, [str(cmp_nailgun['id'])]) self.fuel_web.verify_network(cluster_id) self.fuel_web.run_ostf(cluster_id, test_sets=['ha', 'smoke', 'sanity']) # Verify that the created volume is still available try: volume = os_conn.cinder.volumes.get(volume.id) except NotFound: raise AssertionError( "{0} volume is not available after its {1} hosting node " "reinstallation".format(volume.id, cmp_nailgun['fqdn'])) expected_status = "available" assert_equal( expected_status, volume.status, "{0} volume status is {1} after its {2} hosting node " "reinstallation. Expected status is {3}.".format( volume.id, volume.status, cmp_nailgun['fqdn'], expected_status) ) # Verify that the VM is still available try: os_conn.verify_instance_status(vm, 'ACTIVE') except AssertionError: raise AssertionError( "{0} VM is not available after its {1} hosting node " "reinstallation".format(vm.name, cmp_host.hypervisor_hostname)) assert_true(devops_helpers.tcp_ping(vm_floating_ip.ip, 22), "{0} VM is not accessible via its {1} floating " "ip".format(vm.name, vm_floating_ip))
def mongo_mysql_partition_preservation(self): """Verify partition preservation of Ceilometer and mysql data. Scenario: 1. Revert the snapshot 2. Create a ceilometer alarm 3. Mark 'mongo' and 'mysql' partitions to be preserved on one of controllers 4. Reinstall the controller 5. Verify that the alarm is present after the node reinstallation 6. Verify that the reinstalled node joined the Galera cluster and synced its state 7. Run network verification 8. Run OSTF Duration: 110m """ self.env.revert_snapshot("node_reinstallation_env") cluster_id = self.fuel_web.get_last_created_cluster() # Create a ceilometer alarm with self.fuel_web.get_ssh_for_node("slave-01") as remote: alarm_name = "test_alarm" res = remote.execute( "source openrc; " "ceilometer alarm-threshold-create " "--name {0} " "-m {1} " "--threshold {2}".format(alarm_name, "cpu_util", "80.0") ) assert_equal(0, res['exit_code'], "Creating alarm via ceilometer CLI failed.") initial_alarms = remote.execute( "source openrc; ceilometer alarm-list") mongo_nailgun = self.fuel_web.get_nailgun_cluster_nodes_by_roles( cluster_id, ['mongo'])[0] # Mark 'mongo' and 'mysql' partitions to be preserved with self.env.d_env.get_admin_remote() as remote: preserve_partition(remote, mongo_nailgun['id'], "mongo") preserve_partition(remote, mongo_nailgun['id'], "mysql") NodeReinstallationEnv.reinstall_nodes( self.fuel_web, cluster_id, [str(mongo_nailgun['id'])]) with self.fuel_web.get_ssh_for_nailgun_node(mongo_nailgun) as rmt: alarms = rmt.execute("source openrc; ceilometer alarm-list") assert_equal( initial_alarms['stdout'], alarms['stdout'], "{0} alarm is not available in mongo after reinstallation " "of the controllers".format(alarm_name)) cmd = ("mysql --connect_timeout=5 -sse " "\"SHOW STATUS LIKE 'wsrep%';\"") err_msg = ("Galera isn't ready on {0} " "node".format(mongo_nailgun['hostname'])) devops_helpers.wait( lambda: rmt.execute(cmd)['exit_code'] == 0, timeout=10 * 60, timeout_msg=err_msg) cmd = ("mysql --connect_timeout=5 -sse \"SHOW STATUS LIKE " "'wsrep_local_state_comment';\"") err_msg = ("The reinstalled node {0} is not synced with the " "Galera cluster".format(mongo_nailgun['hostname'])) devops_helpers.wait( lambda: rmt.execute(cmd)['stdout'][0].split()[1] == "Synced", timeout=10 * 60, timeout_msg=err_msg) self.fuel_web.verify_network(cluster_id) self.fuel_web.run_ostf(cluster_id, test_sets=['ha', 'smoke', 'sanity'])
def baremetal_deploy_virt_nodes_on_different_computes(self): """Baremetal deployment of a cluster with virtual nodes in HA mode; each virtual node on a separate compute Scenario: 1. Create cluster 2. Assign compute and virt roles to three slave nodes 3. Upload VM configuration for one VM to each slave node 4. Apply network template for the env and spawn the VMs 5. Assign controller role to VMs 6. Deploy cluster 7. Run OSTF 8. Mark 'mysql' partition to be preserved on one of controllers 9. Reinstall the controller 10. Verify that the reinstalled controller joined the Galera cluster and synced its state 11. Run OSTF 12. Gracefully reboot one controller using "reboot" command and wait till it comes up 13. Run OSTF 14. Forcefully reboot one controller using "reboot -f" command and wait till it comes up 15. Run OSTF 16. Gracefully reboot one compute using "reboot" command and wait till compute and controller come up 17. Run OSTF 18. Forcefully reboot one compute using "reboot -f" command and wait till compute and controller come up 19. Run OSTF Duration: 360m """ self.env.revert_snapshot("ready_with_3_slaves") self.show_step(1) checkers.enable_feature_group(self.env, "advanced") cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_HA, settings={ 'net_provider': 'neutron', 'net_segment_type': settings.NEUTRON_SEGMENT['vlan'] }) self.show_step(2) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['compute', 'virt'], 'slave-02': ['compute', 'virt'], 'slave-03': ['compute', 'virt'] }) self.show_step(3) for node in self.fuel_web.client.list_cluster_nodes(cluster_id): self.fuel_web.client.create_vm_nodes(node['id'], [{ "id": 1, "mem": 2, "cpu": 2, "vda_size": "100G" }]) self.show_step(4) self.update_virt_vm_template() net_template = get_network_template("baremetal_rf_ha") self.fuel_web.client.upload_network_template(cluster_id, net_template) self.fuel_web.spawn_vms_wait(cluster_id) wait(lambda: len(self.fuel_web.client.list_nodes()) == 6, timeout=60 * 60, timeout_msg=("Timeout waiting 2 available nodes, " "current nodes: \n{0}" + '\n'.join([ 'Name: {0}, status: {1}, online: {2}'.format( i['name'], i['status'], i['online']) for i in self.fuel_web.client.list_nodes() ]))) self.show_step(5) virt_nodes = { 'vslave-01': ['controller'], 'vslave-02': ['controller'], 'vslave-03': ['controller'] } self.update_virtual_nodes(cluster_id, virt_nodes) self.show_step(6) self.deploy_cluster_wait(cluster_id) self.show_step(7) self.fuel_web.run_ostf(cluster_id=cluster_id) self.show_step(8) virt_nodes = [ n for n in self.fuel_web.client.list_nodes() if n['name'].startswith('vslave') ] ctrl = virt_nodes[0] with self.ssh_manager.get_remote(self.ssh_manager.admin_ip) as admin: preserve_partition(admin, ctrl['id'], "mysql") self.show_step(9) task = self.fuel_web.client.provision_nodes(cluster_id, [str(ctrl['id'])]) self.fuel_web.assert_task_success(task) task = self.fuel_web.client.deploy_nodes(cluster_id, [str(ctrl['id'])]) self.fuel_web.assert_task_success(task) self.show_step(10) cmd = "mysql --connect_timeout=5 -sse \"SHOW STATUS LIKE 'wsrep%';\"" with self.ssh_manager.get_remote(self.ssh_manager.admin_ip) as admin: err_msg = ("Galera isn't ready on {0} node".format( ctrl['hostname'])) wait(lambda: admin.execute_through_host( ctrl['ip'], cmd, auth=self.ssh_auth)['exit_code'] == 0, timeout=10 * 60, timeout_msg=err_msg) cmd = ("mysql --connect_timeout=5 -sse \"SHOW STATUS LIKE " "'wsrep_local_state_comment';\"") err_msg = ("The reinstalled node {0} is not synced with the " "Galera cluster".format(ctrl['hostname'])) wait( # pylint: disable=no-member lambda: admin.execute_through_host( ctrl['ip'], cmd, auth=self.ssh_auth)['stdout'][0].split()[ 1] == "Synced", # pylint: enable=no-member timeout=10 * 60, timeout_msg=err_msg) self.show_step(11) self.fuel_web.run_ostf(cluster_id=cluster_id) self.show_step(12) self.show_step(13) self.show_step(14) self.show_step(15) cmds = {"reboot": "gracefully", "reboot -f >/dev/null &": "forcefully"} for cmd in cmds: with self.ssh_manager.get_remote(self.ssh_manager.admin_ip) as \ admin: asserts.assert_true( admin.execute_through_host(virt_nodes[1]['ip'], cmd, auth=self.ssh_auth, timeout=60)['exit_code'] == 0, "Failed to {0} reboot {1} controller" "node".format(cmds[cmd], virt_nodes[1]['name'])) self.wait_for_slave(virt_nodes[1]) self.fuel_web.run_ostf(cluster_id=cluster_id) self.show_step(16) self.show_step(17) self.show_step(18) self.show_step(19) compute = self.fuel_web.get_nailgun_cluster_nodes_by_roles( cluster_id, ['compute'])[0] for cmd in cmds: with self.ssh_manager.get_remote(self.ssh_manager.admin_ip) as \ admin: asserts.assert_true( admin.execute_through_host(compute['ip'], cmd, auth=self.ssh_auth, timeout=60)['exit_code'] == 0, "Failed to {0} reboot {1} compute" "node".format(cmds[cmd], compute['name'])) self.wait_for_slave(compute) for vm in virt_nodes: self.wait_for_slave(vm) self.fuel_web.run_ostf(cluster_id=cluster_id)
def compute_stop_reinstallation(self): """Verify stop reinstallation of compute. Scenario: 1. Revert the snapshot 2. Create an OS volume and OS instance 3. Mark 'cinder' and 'vm' partitions to be preserved 4. Stop reinstallation process of compute 5. Start the reinstallation process again 6. Run network verification 7. Run OSTF 8. Verify that the volume is present and has 'available' status after the node reinstallation 9. Verify that the VM is available and pingable after the node reinstallation Duration: 115m """ self.env.revert_snapshot("node_reinstallation_env") cluster_id = self.fuel_web.get_last_created_cluster() # Create an OS volume os_conn = os_actions.OpenStackActions(self.fuel_web.get_public_vip(cluster_id)) volume = os_conn.create_volume() # Create an OS instance cmp_host = os_conn.get_hypervisors()[0] net_label = self.fuel_web.get_cluster_predefined_networks_name(cluster_id)["private_net"] vm = os_conn.create_server_for_migration( neutron=True, availability_zone="nova:{0}".format(cmp_host.hypervisor_hostname), label=net_label ) vm_floating_ip = os_conn.assign_floating_ip(vm) devops_helpers.wait(lambda: devops_helpers.tcp_ping(vm_floating_ip.ip, 22), timeout=120) cmp_nailgun = self.fuel_web.get_nailgun_node_by_fqdn(cmp_host.hypervisor_hostname) # Mark 'cinder' and 'vm' partitions to be preserved with self.env.d_env.get_admin_remote() as remote: preserve_partition(remote, cmp_nailgun["id"], "cinder") preserve_partition(remote, cmp_nailgun["id"], "vm") slave_nodes = self.fuel_web.client.list_cluster_nodes(cluster_id) devops_nodes = self.fuel_web.get_devops_nodes_by_nailgun_nodes(slave_nodes) logger.info("Stop reinstallation process") self._stop_reinstallation(self.fuel_web, cluster_id, [str(cmp_nailgun["id"])], devops_nodes) self.fuel_web.verify_network(cluster_id) logger.info("Start the reinstallation process again") NodeReinstallationEnv.reinstall_nodes(self.fuel_web, cluster_id, [str(cmp_nailgun["id"])]) self.fuel_web.verify_network(cluster_id) self.fuel_web.run_ostf(cluster_id, test_sets=["ha", "smoke", "sanity"]) # Verify that the created volume is still available try: volume = os_conn.cinder.volumes.get(volume.id) except NotFound: raise AssertionError( "{0} volume is not available after its {1} hosting node " "reinstallation".format(volume.id, cmp_nailgun["fqdn"]) ) expected_status = "available" assert_equal( expected_status, volume.status, "{0} volume status is {1} after its {2} hosting node " "reinstallation. Expected status is {3}.".format( volume.id, volume.status, cmp_nailgun["fqdn"], expected_status ), ) # Verify that the VM is still available try: os_conn.verify_instance_status(vm, "ACTIVE") except AssertionError: raise AssertionError( "{0} VM is not available after its {1} hosting node " "reinstallation".format(vm.name, cmp_host.hypervisor_hostname) ) assert_true( devops_helpers.tcp_ping(vm_floating_ip.ip, 22), "{0} VM is not accessible via its {1} floating " "ip".format(vm.name, vm_floating_ip), )
def mongo_mysql_partition_preservation(self): """Verify partition preservation of Ceilometer and mysql data. Scenario: 1. Revert the snapshot 2. Create a ceilometer alarm 3. Mark 'mongo' and 'mysql' partitions to be preserved on one of controllers 4. Reinstall the controller 5. Verify that the alarm is present after the node reinstallation 6. Verify that the reinstalled node joined the Galera cluster and synced its state 7. Run network verification 8. Run OSTF Duration: 110m """ self.env.revert_snapshot("node_reinstallation_env") cluster_id = self.fuel_web.get_last_created_cluster() # Create a ceilometer alarm with self.fuel_web.get_ssh_for_node("slave-01") as remote: alarm_name = "test_alarm" res = remote.execute( "source openrc; " "ceilometer alarm-threshold-create " "--name {0} " "-m {1} " "--threshold {2}".format(alarm_name, "cpu_util", "80.0") ) assert_equal(0, res["exit_code"], "Creating alarm via ceilometer CLI failed.") initial_alarms = remote.execute("source openrc; ceilometer alarm-list") mongo_nailgun = self.fuel_web.get_nailgun_cluster_nodes_by_roles(cluster_id, ["mongo"])[0] # Mark 'mongo' and 'mysql' partitions to be preserved with self.env.d_env.get_admin_remote() as remote: preserve_partition(remote, mongo_nailgun["id"], "mongo") preserve_partition(remote, mongo_nailgun["id"], "mysql") NodeReinstallationEnv.reinstall_nodes(self.fuel_web, cluster_id, [str(mongo_nailgun["id"])]) with self.fuel_web.get_ssh_for_nailgun_node(mongo_nailgun) as rmt: alarms = rmt.execute("source openrc; ceilometer alarm-list") assert_equal( initial_alarms["stdout"], alarms["stdout"], "{0} alarm is not available in mongo after reinstallation " "of the controllers".format(alarm_name), ) cmd = "mysql --connect_timeout=5 -sse " "\"SHOW STATUS LIKE 'wsrep%';\"" err_msg = "Galera isn't ready on {0} " "node".format(mongo_nailgun["hostname"]) devops_helpers.wait(lambda: rmt.execute(cmd)["exit_code"] == 0, timeout=10 * 60, timeout_msg=err_msg) cmd = 'mysql --connect_timeout=5 -sse "SHOW STATUS LIKE ' "'wsrep_local_state_comment';\"" err_msg = "The reinstalled node {0} is not synced with the " "Galera cluster".format( mongo_nailgun["hostname"] ) devops_helpers.wait( lambda: rmt.execute(cmd)["stdout"][0].split()[1] == "Synced", timeout=10 * 60, timeout_msg=err_msg ) self.fuel_web.verify_network(cluster_id) self.fuel_web.run_ostf(cluster_id, test_sets=["ha", "smoke", "sanity"])