def check_alarms(self, alarm_type, filter_value, source, hostname, value, time_interval="now() - 5m"): filter_by = "node_role" if alarm_type == "service": filter_by = "service" filters = [ "time >= {}".format(time_interval), "{} = '{}'".format(filter_by, filter_value), "value = {}".format(value) ] if source is not None: filters.append("source = '{}'".format(source)) if hostname is not None: filters.append("hostname = '{}'".format(hostname)) query = "select last(value) from {select_from} where {filters}".format( select_from="{}_status".format(alarm_type), filters=" and ".join(filters)) def check_result(): return len( self.do_influxdb_query(query=query).json()['results'][0]) msg = ("Alarm of type: {}: entity: {}, source:{}, hostname: {}, " "value: {} wasn't triggered".format(alarm_type, filter_value, source, hostname, value)) utils.wait(check_result, timeout=60 * 5, interval=10, timeout_msg=msg)
def test_glance_notifications(self, os_clients, es_client): """Check that Glance notifications are present in Elasticsearch Scenario: 1. Create, update and delete image actions using Glance v2 2. Check that Glance notifications are present in current Elasticsearch index Duration 15m """ glance_event_types = [ "image.create", "image.prepare", "image.upload", "image.activate", "image.update", "image.delete" ] image_name = utils.rand_name("image-") client = os_clients.image image = client.images.create(name=image_name, container_format="bare", disk_format="raw") client.images.upload(image.id, "dummy_data") wait_for_resource_status(client.images, image.id, "active") prop = utils.rand_name("prop") value_prop = utils.rand_name("value") properties = '{0}: {1}'.format(prop, value_prop) image = client.images.update(image.id, group_props=properties) assert any(image[key] == properties for key in image) is True client.images.delete(image.id) utils.wait(lambda: (image.id not in client.images.list())) es_client.check_notifications(glance_event_types, query_filter="Logger:glance", size=500)
def test_glance_metrics(self, destructive, prometheus_api, os_clients): image_name = utils.rand_name("image-") client = os_clients.image image = client.images.create( name=image_name, container_format="bare", disk_format="raw", visibility="public") client.images.upload(image.id, "dummy_data") wait_for_resource_status(client.images, image.id, "active") destructive.append(lambda: client.images.delete(image.id)) filter = {"visibility": "public"} images_count = len([im for im in client.images.list( filters=filter)]) images_size = sum([im["size"] for im in client.images.list( filters=filter)]) count_query = ('{__name__="openstack_glance_images",' 'visibility="public",status="active"}') err_count_msg = "Incorrect image count in metric {}".format( count_query) self.check_openstack_metrics( prometheus_api, count_query, images_count, err_count_msg) size_query = ('{__name__="openstack_glance_images_size",' 'visibility="public", status="active"}') error_size_msg = "Incorrect image size in metric {}".format(size_query) self.check_openstack_metrics( prometheus_api, size_query, images_size, error_size_msg) client.images.delete(image.id) utils.wait( lambda: (image.id not in [i["id"] for i in client.images.list()]) )
def create_basic_server(self, image=None, flavor=None, net=None, availability_zone=None, sec_groups=(), wait_timeout=3 * 60): os_conn = self.os_clients image = image or self.get_cirros_image() flavor = flavor or self.get_micro_flavor() net = net or self.get_internal_network() kwargs = {} if sec_groups: kwargs['security_groups'] = sec_groups server = os_conn.compute.servers.create( utils.rand_name("server-"), image, flavor, nics=[{ "net-id": net["id"] }], availability_zone=availability_zone, **kwargs) if wait_timeout: utils.wait( lambda: os_conn.compute.servers.get(server).status == "ACTIVE", timeout=wait_timeout, timeout_msg=("Create server {!r} failed by timeout. " "Please, take a look at OpenStack logs".format( server.id))) return server
def test_cinder_metrics(self, destructive, prometheus_api, os_clients): volume_name = utils.rand_name("volume-") expected_volume_status = settings.VOLUME_STATUS client = os_clients.volume volume = client.volumes.create(size=1, name=volume_name) wait_for_resource_status(client.volumes, volume.id, expected_volume_status) destructive.append(lambda: client.volume.delete(volume)) filter = {'status': expected_volume_status, 'all_tenants': 1} volumes_count = len([vol for vol in client.volumes.list( search_opts=filter)]) volumes_size = sum([vol.size for vol in client.volumes.list( search_opts=filter)]) * 10**9 count_query = ('{{__name__="openstack_cinder_volumes",' 'status="{0}"}}'.format(expected_volume_status)) err_count_msg = "Incorrect volume count in metric {}".format( count_query) self.check_openstack_metrics( prometheus_api, count_query, volumes_count, err_count_msg) size_query = ('{{__name__="openstack_cinder_volumes_size",' 'status="{0}"}}'.format(expected_volume_status)) error_size_msg = "Incorrect volume size in metric {}".format( size_query) self.check_openstack_metrics( prometheus_api, size_query, volumes_size, error_size_msg) client.volumes.delete(volume) utils.wait( lambda: (volume.id not in [v.id for v in client.volumes.list()]) )
def verify_service_state_change( service_names, action, new_state, service_state_in_influx, down_backends_in_haproxy, ): logger.info("Changing state of service {0}. " "New state is {1}".format(service_names[0], new_state)) for toolchain_node in toolchain_nodes: toolchain_node.os.clear_local_mail() for node in controller_nodes: if action == "stop": destructive.append(lambda: node.os.manage_service( service_names[0], "start")) node.os.manage_service(service_names[0], action) influxdb_client.check_cluster_status(service_names[1], service_state_in_influx) if service_names[3]: influxdb_client.check_count_of_haproxy_backends( service_names[3], expected_count=down_backends_in_haproxy) nagios_client.wait_service_state_on_nagios( {service_names[2]: new_state}) msg = ("Mail check failed for service: {} " "with new status: {}.".format(service_names[2], new_state)) utils.wait(lambda: (any( t_node.os.check_local_mail(service_names[2], new_state) for t_node in toolchain_nodes)), timeout=5 * 60, interval=15, timeout_msg=msg)
def create_stack(self, template, disable_rollback=True, parameters=None, wait_active=True): parameters = parameters or {} stack_name = utils.rand_name('stack-') stack_id = self.os_clients.orchestration.stacks.create( stack_name=stack_name, template=template, parameters=parameters, disable_rollback=disable_rollback )['stack']['id'] # self.addCleanup(self.delete_stack, stack_id) # heat client doesn't return stack details after creation # so need to request them stack = self.os_clients.orchestration.stacks.get(stack_id) if wait_active: utils.wait( (lambda: self.os_clients.orchestration.stacks.get( stack_id).stack_status == "CREATE_COMPLETE"), interval=10, timeout=180, ) return stack
def test_cinder_notifications(self, os_clients, es_client): """Check that Cinder notifications are present in Elasticsearch Scenario: 1. Create a volume and update it 2. Check that Cinder notifications are present in current Elasticsearch index Duration 15m """ cinder_event_types = ["volume.update.start", "volume.update.end"] cinder = os_clients.volume logger.info("Create a volume") volume = cinder.volumes.create(size=1) wait_for_resource_status(os_clients.volume.volumes, volume.id, "available") logger.info("Update the volume") if cinder.version == 1: cinder.volumes.update(volume, display_name="updated_volume") else: cinder.volumes.update(volume, name="updated_volume") wait_for_resource_status(os_clients.volume.volumes, volume.id, "available") logger.info("Delete the volume") cinder.volumes.delete(volume) utils.wait(lambda: volume.id not in cinder.volumes.list()) es_client.check_notifications(cinder_event_types, query_filter='volume_id:"{}"'.format( volume.id), size=500)
def verify_service_state_change( service_names, action, new_state, service_state_in_influx, down_backends_in_haproxy, ): logger.info("Changing state of service {0}. " "New state is {1}".format(service_names[0], new_state)) for toolchain_node in toolchain_nodes: toolchain_node.os.clear_local_mail() for node in controller_nodes: node.os.manage_service(service_names[0], action) self.influxdb_api.check_cluster_status(service_names[1], service_state_in_influx) if service_names[3]: self.influxdb_api.check_count_of_haproxy_backends( service_names[3], expected_count=down_backends_in_haproxy) utils.wait(lambda: (any( t_node.os.check_local_mail(service_names[2], new_state) for t_node in toolchain_nodes)), timeout=5 * 60, interval=15)
def test_nova_metrics(self, os_clients, os_actions, influxdb_client): """Verify that the Nova metrics are collecting. Scenario: 1. Create 3 new instances 2. Check Nova metrics in InfluxDB Duration 5m """ time_started = "{}s".format(int(time.time())) check_metrics = influxdb_client.get_instance_creation_time_metrics metrics = check_metrics(time_started) new_instance_count = 3 new_servers = [] for _ in range(new_instance_count): new_servers.append(os_actions.create_basic_server()) total_instances = new_instance_count + len(metrics) msg = ("There is a mismatch of instances in Nova metrics, " "found less than {}".format(total_instances)) utils.wait( (lambda: len(check_metrics(time_started)) == total_instances), interval=10, timeout=180, timeout_msg=msg) for server in new_servers: os_clients.compute.servers.delete(server)
def test_nova_aggregates_memory(self, prometheus_api, prometheus_alerting, os_clients, os_actions, destructive): def get_agg_free_ram(a_n, a_id): def _get_current_value(q): try: v = prometheus_api.get_query(q)[0]["value"][1] except IndexError: v = 0 return v query = ('openstack_nova_aggregate_free_ram{aggregate="' + a_n + '",aggregate_id="' + str(a_id) + '"}') utils.wait(lambda: _get_current_value(query) != 0, interval=10, timeout=2 * 60) return _get_current_value(query) client = os_clients.compute aggr_name = "test-aggr" az = "test-az" host = "cmp01" aggr = client.aggregates.create(aggr_name, az) client.aggregates.add_host(aggr, host) destructive.append(lambda: client.aggregates.remove_host( aggr, host)) destructive.append(lambda: client.aggregates.delete(aggr.id)) criteria = { "name": "NovaAggregatesFreeMemoryLow" } prometheus_alerting.check_alert_status( criteria, is_fired=False, timeout=10 * 60) free_ram = get_agg_free_ram(aggr_name, aggr.id) image = os_actions.get_cirros_image() flavor = os_actions.create_flavor( name="test_flavor", ram=int(free_ram) - 100) destructive.append(lambda: client.flavors.delete(flavor)) tenant_id = os_actions.get_admin_tenant().id net = os_actions.create_network(tenant_id) subnet = os_actions.create_subnet(net, tenant_id, "192.168.100.0/24") server = os_actions.create_basic_server(image, flavor, net, availability_zone=az) destructive.append(lambda: client.servers.delete(server)) destructive.append(lambda: os_clients.network.delete_subnet( subnet['id'])) destructive.append(lambda: os_clients.network.delete_network( net['id'])) prometheus_alerting.check_alert_status( criteria, is_fired=True, timeout=10 * 60) client.servers.delete(server) utils.wait( lambda: (server.id not in [s.id for s in client.servers.list()]) ) prometheus_alerting.check_alert_status( criteria, is_fired=False, timeout=10 * 60) os_clients.network.delete_subnet(subnet['id']) os_clients.network.delete_network(net['id']) client.flavors.delete(flavor) client.aggregates.remove_host(aggr, host) client.aggregates.delete(aggr.id)
def _check_influx_query_last_value(self, query, expected_value): def check_status(): output = self.do_influxdb_query(query) result = output.json()['results'][0] if not result: return False return result['series'][0]['values'][0][1] == expected_value utils.wait(lambda: check_status(), timeout=5 * 60)
def test_system_metrics(self, prometheus_api, cluster, target, metrics): expected_hostnames = [h.hostname for h in cluster.hosts] for hostname in expected_hostnames: q = ('{{__name__=~"^{}.*", host="{}"}}'.format(target, hostname)) logger.info("Waiting to get all metrics") msg = "Timed out waiting to get all metrics" utils.wait( lambda: self.verify_notifications(prometheus_api, metrics, q), timeout=5 * 60, interval=10, timeout_msg=msg)
def wait_for_resource_status(resource_client, resource, expected_status, timeout=180, interval=10): msg = "Timed out waiting to become {}".format(expected_status) utils.wait( (lambda: resource_client.get(resource).status == expected_status), interval=interval, timeout=timeout, timeout_msg=msg)
def set_rabbitmq_memory_watermark(self, controller, limit, timeout=5 * 60): def check_result(): exit_code, _, _ = controller.os.transport.exec_sync( "rabbitmqctl set_vm_memory_high_watermark {}".format(limit)) if exit_code == 0: return True else: return False msg = "Failed to set vm_memory_high_watermark to {}".format(limit) utils.wait(check_result, timeout=timeout, interval=10, timeout_msg=msg)
def _check_influx_query_last_value(self, query, expected_value): def check_status(): logger.debug("Awaiting value: {}".format(expected_value)) output = self.do_influxdb_query(query) result = output.json()['results'][0] if not result or 'series' not in result: return False return result['series'][0]['values'][0][1] == expected_value msg = "There is no such value: {} in results of query: {}".format( expected_value, query) utils.wait(lambda: check_status(), timeout=5 * 60, timeout_msg=msg)
def get_agg_free_ram(a_n, a_id): def _get_current_value(q): try: v = prometheus_api.get_query(q)[0]["value"][1] except IndexError: v = 0 return v query = ('openstack_nova_aggregate_free_ram{aggregate="' + a_n + '",aggregate_id="' + str(a_id) + '"}') utils.wait(lambda: _get_current_value(query) != 0, interval=10, timeout=2 * 60) return _get_current_value(query)
def check_openstack_metrics(self, prometheus_api, query, value, msg): def _verify_notifications(q, v): output = prometheus_api.get_query(q) logger.info("Check {} in {}".format(v, output)) if not output: logger.error('Empty results received, ' 'check a query {0}'.format(q)) return False return v in output[0]["value"] utils.wait( lambda: _verify_notifications(query, str(value)), interval=10, timeout=2 * 60, timeout_msg=msg )
def wait_service_state_on_nagios(self, service_state=None, node_names=None): msg = ("Fail to get expected service states for services: {0} " "on nodes: {1}") msg = msg.format( [key for key in service_state] if service_state is not None else "all", node_names if node_names is not None else "global-cluster") utils.wait(lambda: self.check_service_state_on_nagios( service_state, node_names), timeout=60 * 5, timeout_msg=msg)
def check_metric_values(self, query, value, msg=None): def _verify_notifications(q, v): output = self.get_query(q) logger.info("Check '{}' value in {} metric values".format( v, output)) if not output: logger.error('Empty results received, ' 'check a query {0}'.format(q)) return False return v in output[0]["value"] msg = msg if msg else 'Incorrect value in metric {}'.format(query) utils.wait(lambda: _verify_notifications(query, str(value)), interval=30, timeout=5 * 60, timeout_msg=msg)
def test_system_metrics(self, prometheus_api, salt_actions, target, metrics): nodes = salt_actions.ping() expected_hostnames = [node.split(".")[0] for node in nodes] for hostname in expected_hostnames: if "SKIP_NODES" in os.environ.keys(): if hostname in os.environ['SKIP_NODES']: print "Skip {}".format(hostname) continue q = ('{{__name__=~"^{}.*", host="{}"}}'.format(target, hostname)) logger.info("Waiting to get all metrics") msg = "Timed out waiting to get all metrics" utils.wait( lambda: self.verify_notifications(prometheus_api, metrics, q), timeout=5 * 60, interval=10, timeout_msg=msg)
def check_alert_status(self, criteria, is_fired=True, timeout=5 * 60): def check(): logger.debug("Awaiting alert {} is{} fired.".format( criteria, " not" if not is_fired else "")) status = self.get_alert_status(criteria) logger.debug("Alert is{} fired.".format( " not" if not status else "")) return status == is_fired msg = "Alert status was not changed." return utils.wait(check, timeout=timeout, timeout_msg=msg)
def test_prometheus_lts(self, prometheus_api, salt_actions): def compare_meas(sts_api, lts_api): sts_meas = sts_api.get_all_measurements() lts_meas = lts_api.get_all_measurements() if sts_meas == lts_meas: return True else: logger.info("Measurements in Prometheus short term storage " "and NOT in long term storage: {0}\n" "Measurements in Prometheus long term storage " "and NOT in short term storage: {1}".format( sts_meas.difference(lts_meas), lts_meas.difference(sts_meas))) return False hosts = salt_actions.ping("I@prometheus:relay") if not hosts: pytest.skip("Prometheus LTS is not used in the cluster") address = salt_actions.get_pillar_item(hosts[0], '_param:single_address')[0] port = salt_actions.get_pillar_item(hosts[0], "prometheus:server:bind:port")[0] prometheus_lts = PrometheusClient("http://{0}:{1}/".format( address, port)) logger.info("Checking that target for Prometheus LTS is up") q = 'up{job="prometheus_federation"}' output = prometheus_lts.get_query(q) logger.info('Got {} metrics for {} query'.format(output, q)) msg = 'There are no metrics for query'.format(q) assert len(output), msg logger.info("Check value '1' for metrics {}".format(q)) msg = 'Incorrect value in metric {}' for metric in output: assert '1' in metric['value'], msg.format(metric) logger.info("Comparing lists of measurements in Prometheus long term " "storage and short term storage") timeout_msg = "Measurements in Prometheus STS and LTS inconsistent" utils.wait(lambda: compare_meas(prometheus_api, prometheus_lts), interval=30, timeout=2 * 60, timeout_msg=timeout_msg)
def check_notifications(self, expected_notifications, timeout=300, interval=10, **kwargs): def _verify_notifications(expected_list): output = self.query_elasticsearch(**kwargs) got_list = list( set([hit["_source"]["event_type"] for hit in output["hits"]["hits"]])) for event_type in expected_list: if event_type not in got_list: logger.info("{} event type not found in {}".format( event_type, got_list)) return False return True logger.info("Waiting to get all notifications") msg = "Timed out waiting to get all notifications" utils.wait( lambda: _verify_notifications(expected_notifications), timeout=timeout, interval=interval, timeout_msg=msg)
def check_notifications(self, expected_notifications, index_type="notification", timeout=5 * 60, interval=10, **kwargs): def _verify_notifications(expected_list): output = self.query_elasticsearch(index_type=index_type, **kwargs) got = set(hit["_source"]["event_type"] for hit in output["hits"]["hits"]) delta = set(expected_list) - got if delta: logger.info("{} event type not found in {}".format(delta, got)) return False return True logger.info("Waiting to get all notifications") msg = "Timed out waiting to get all notifications" utils.wait(lambda: _verify_notifications(expected_notifications), timeout=timeout, interval=interval, timeout_msg=msg)
def check_status(self, service_type, hostname, value, time_interval="now() - 10s"): filters = [ "time >= {}".format(time_interval), "value = {}".format(value) ] if hostname is not None: filters.append("hostname = '{}'".format(hostname)) query = "select last(value) from {alarm_type} where {filters}".format( alarm_type=service_type, filters=" and ".join(filters)) def check_result(): return len( self.do_influxdb_query(query=query).json()['results'][0]) msg = ("Alarm of type: {}: hostname: {}, " "value: {} wasn't triggered".format(service_type, hostname, value)) utils.wait(check_result, timeout=60 * 5, interval=10, timeout_msg=msg)
def test_alerta_alerts_consistency(prometheus_native_alerting, alerta_api): def check_alerts(): alerta_alerts = { "{0} {1}".format(i.event, i.resource) for i in alerta_api.get_alerts({"status": "open"}) } alertmanager_alerts = { "{0} {1}".format(i.name, i.instance) for i in prometheus_native_alerting.list_alerts() } if alerta_alerts == alertmanager_alerts: return True else: logger.info("Alerts in Alerta and NOT in AlertManager: {0}\n" "Alerts in AlertManager and NOT in Alerta: {1}".format( alerta_alerts.difference(alertmanager_alerts), alertmanager_alerts.difference(alerta_alerts))) return False utils.wait(check_alerts, interval=30, timeout=6 * 60, timeout_msg="Alerts in Alertmanager and Alerta incosistent")
def test_nova_notifications(self, os_clients, os_actions, es_client): """Check that Nova notifications are present in Elasticsearch Scenario: 1. Launch, update, rebuild, resize, power-off, power-on, snapshot, suspend, shutdown, and delete an instance 2. Check that Nova notifications are present in current Elasticsearch index Duration 15m """ nova_event_types = [ "compute.instance.create.start", "compute.instance.create.end", "compute.instance.delete.start", "compute.instance.delete.end", "compute.instance.rebuild.start", "compute.instance.rebuild.end", # NOTE(rpromyshlennikov): # Disabled in favor of compatibility with Mk2x # "compute.instance.rebuild.scheduled", # "compute.instance.resize.prep.start", # "compute.instance.resize.prep.end", # "compute.instance.resize.confirm.start", # "compute.instance.resize.confirm.end", # "compute.instance.resize.revert.start", # "compute.instance.resize.revert.end", "compute.instance.exists", # "compute.instance.update", "compute.instance.shutdown.start", "compute.instance.shutdown.end", "compute.instance.power_off.start", "compute.instance.power_off.end", "compute.instance.power_on.start", "compute.instance.power_on.end", "compute.instance.snapshot.start", "compute.instance.snapshot.end", # "compute.instance.resize.start", "compute.instance.resize.end", # "compute.instance.finish_resize.start", # "compute.instance.finish_resize.end", "compute.instance.suspend.start", "compute.instance.suspend.end", # "scheduler.select_destinations.start", # "scheduler.select_destinations.end" ] instance_event_types = nova_event_types[:-2] instance = os_actions.create_basic_server() logger.info("Update the instance") os_clients.compute.servers.update(instance, name="test-server") wait_for_resource_status(os_clients.compute.servers, instance, "ACTIVE") image = os_actions.get_cirros_image() logger.info("Rebuild the instance") os_clients.compute.servers.rebuild(instance, image, name="rebuilded_instance") wait_for_resource_status(os_clients.compute.servers, instance, "ACTIVE") # NOTE(rpromyshlennikov): # Disabled in favor of compatibility with Mk2x # logger.info("Resize the instance") # flavors = os_clients.compute.flavors.list(sort_key="memory_mb") # os_clients.compute.servers.resize(instance, flavors[1]) # wait_for_resource_status( # os_clients.compute.servers, instance, "VERIFY_RESIZE") # logger.info("Confirm the resize") # os_clients.compute.servers.confirm_resize(instance) # wait_for_resource_status( # os_clients.compute.servers, instance, "ACTIVE") # logger.info("Resize the instance") # os_clients.compute.servers.resize(instance, flavors[2]) # wait_for_resource_status( # os_clients.compute.servers, instance, "VERIFY_RESIZE") # logger.info("Revert the resize") # os_clients.compute.servers.revert_resize(instance) # wait_for_resource_status( # os_clients.compute.servers, instance, "ACTIVE") logger.info("Stop the instance") os_clients.compute.servers.stop(instance) wait_for_resource_status(os_clients.compute.servers, instance, "SHUTOFF") logger.info("Start the instance") os_clients.compute.servers.start(instance) wait_for_resource_status(os_clients.compute.servers, instance, "ACTIVE") logger.info("Suspend the instance") os_clients.compute.servers.suspend(instance) wait_for_resource_status(os_clients.compute.servers, instance, "SUSPENDED") logger.info("Resume the instance") os_clients.compute.servers.resume(instance) wait_for_resource_status(os_clients.compute.servers, instance, "ACTIVE") logger.info("Create an instance snapshot") snapshot = os_clients.compute.servers.create_image( instance, "test-image") wait_for_resource_status(os_clients.compute.images, snapshot, "ACTIVE") logger.info("Delete the instance") os_clients.compute.servers.delete(instance) logger.info("Check that the instance was deleted") utils.wait( lambda: instance.id not in os_clients.compute.servers.list()) es_client.check_notifications(instance_event_types, query_filter='instance_id:"{}"'.format( instance.id), size=500) es_client.check_notifications(nova_event_types, query_filter="Logger:nova", size=500)
def test_libvirt_metrics(self, prometheus_api, salt_actions, os_clients, os_actions, destructive): def wait_for_metrics(inst_id): def _get_current_value(q): output = prometheus_api.get_query(q) logger.info("Got {} libvirt metrics".format(len(output))) return output query = '{{__name__=~"^libvirt.*", instance_uuid="{}"}}'.format( inst_id) output = [] for i in xrange(5): output = _get_current_value(query) if len(output) != 0: return output time.sleep(5) return output nodes = salt_actions.ping("I@nova:controller") if not nodes: pytest.skip("Openstack is not installed in the cluster") client = os_clients.compute logger.info("Creating a test image") image = os_clients.image.images.create(name="TestVM", disk_format='qcow2', container_format='bare') with file_cache.get_file(settings.CIRROS_QCOW2_URL) as f: os_clients.image.images.upload(image.id, f) destructive.append(lambda: os_clients.image.images.delete(image.id)) logger.info("Creating a test flavor") flavor = os_actions.create_flavor(name="test_flavor", ram='64') destructive.append(lambda: client.flavors.delete(flavor)) logger.info("Creating test network and subnet") project_id = os_clients.auth.projects.find(name='admin').id net = os_actions.create_network(project_id) subnet = os_actions.create_subnet(net, project_id, "192.168.100.0/24") logger.info("Creating a test instance") server = os_actions.create_basic_server(image, flavor, net) destructive.append(lambda: client.servers.delete(server)) destructive.append( lambda: os_clients.network.delete_subnet(subnet['id'])) destructive.append( lambda: os_clients.network.delete_network(net['id'])) utils.wait_for_resource_status(client.servers, server, 'ACTIVE') logger.info("Created an instance with id {}".format(server.id)) logger.info("Checking libvirt metrics for the instance") metrics = wait_for_metrics(server.id) metric_names = list(set([m['metric']['__name__'] for m in metrics])) logger.info("Got the following list of libvirt metrics: \n{}".format( metric_names)) regexes = [ 'libvirt_domain_block_stats_read*', 'libvirt_domain_block_stats_write*', 'libvirt_domain_interface_stats_receive*', 'libvirt_domain_interface_stats_transmit*', 'libvirt_domain_info*' ] for regex in regexes: regex = re.compile(r'{}'.format(regex)) logger.info("Check metrics with mask {}".format(regex.pattern)) found = filter(regex.search, metric_names) logger.info("Found {} metrics for mask {}".format( found, regex.pattern)) msg = "Metrics with mask '{}' not found in list {}".format( regex.pattern, metric_names) assert found, msg logger.info("Removing the test instance") client.servers.delete(server) utils.wait(lambda: (server.id not in [s.id for s in client.servers.list()])) logger.info("Removing the test network and subnet") os_clients.network.delete_subnet(subnet['id']) os_clients.network.delete_network(net['id']) logger.info("Removing the test image") os_clients.image.images.delete(image.id) logger.info("Removing the test flavor") client.flavors.delete(flavor)
def test_heat_notifications(self, os_clients, os_actions, es_client): """Check that Heat notifications are present in Elasticsearch Scenario: 1. Run Heat platform actions 2. Check that Heat notifications are present in current Elasticsearch index Duration 25m """ heat_event_types = [ # "orchestration.stack.check.start", # "orchestration.stack.check.end", "orchestration.stack.create.start", "orchestration.stack.create.end", "orchestration.stack.delete.start", "orchestration.stack.delete.end", # "orchestration.stack.resume.start", # "orchestration.stack.resume.end", # "orchestration.stack.rollback.start", # "orchestration.stack.rollback.end", # "orchestration.stack.suspend.start", # "orchestration.stack.suspend.end" ] name = utils.rand_name("heat-flavor-") flavor = os_actions.create_flavor(name) filepath = utils.get_fixture("heat_create_neutron_stack_template.yaml", parent_dirs=("heat", )) with open(filepath) as template_file: template = template_file.read() parameters = { 'InstanceType': flavor.name, 'ImageId': os_actions.get_cirros_image().id, 'network': os_actions.get_internal_network()["id"], } stack = os_actions.create_stack(template, parameters=parameters) # os_clients.orchestration.actions.suspend(stack.id) # utils.wait( # (lambda: # os_clients.orchestration.stacks.get( # stack.id).stack_status == "SUSPEND_COMPLETE"), # interval=10, # timeout=180, # ) resources = os_clients.orchestration.resources.list(stack.id) resource_server = [ res for res in resources if res.resource_type == "OS::Nova::Server" ][0] # instance = os_clients.compute.servers.get( # resource_server.physical_resource_id) # assert instance.status == "SUSPENDED" # # os_clients.orchestration.actions.resume(stack.id) # utils.wait( # (lambda: # os_clients.orchestration.stacks.get( # stack.id).stack_status == "RESUME_COMPLETE"), # interval=10, # timeout=180, # ) instance = os_clients.compute.servers.get( resource_server.physical_resource_id) assert instance.status == "ACTIVE" # os_clients.orchestration.actions.check(stack.id) # # utils.wait( # (lambda: # os_clients.orchestration.stacks.get( # stack.id).stack_status == "CHECK_COMPLETE"), # interval=10, # timeout=180, # ) os_clients.orchestration.stacks.delete(stack.id) os_clients.compute.flavors.delete(flavor.id) name = utils.rand_name("heat-flavor-") extra_large_flavor = os_actions.create_flavor(name, 1048576) parameters['InstanceType'] = extra_large_flavor.name stack = os_actions.create_stack(template, disable_rollback=False, parameters=parameters, wait_active=False) assert stack.stack_status == "CREATE_IN_PROGRESS" utils.wait( (lambda: os_clients.orchestration.stacks.get(stack.id).stack_status in ("DELETE_COMPLETE", "ROLLBACK_COMPLETE")), interval=10, timeout=360, ) resources = os_clients.orchestration.resources.list(stack.id) resource_servers = [ res for res in resources if res.resource_type == "OS::Nova::Server" ] assert (not resource_servers or resource_servers[0].physical_resource_id == "") os_clients.compute.flavors.delete(extra_large_flavor.id) es_client.check_notifications(heat_event_types, query_filter="Logger:heat", size=500)