def test_scale_down_on_group_threshold_breach(): tenant = 'test_group_breach_scale_down' config = utils.load_config() main_client = utils.get_rest_client_using_config( config, tenant='default_tenant', ) main_client.tenants.create(tenant) client = utils.get_rest_client_using_config( config, tenant=tenant, ) utils.upload_config_secrets(config, client) installed_plugins = utils.upload_required_plugins(client) utils.deploy_nagios( utils.get_examples_blueprint_path('nagios-groups.yaml'), utils.get_nagios_inputs(config), client, ) utils.install_blueprint( utils.get_examples_blueprint_path('basegroup1.yaml'), utils.get_monitored_vms_inputs(config), 'basegroup1', client, ) utils.install_blueprint( utils.get_examples_blueprint_path('basegroup2.yaml'), utils.get_monitored_vms_inputs(config), 'basegroup2', client, ) utils.execute_arbitrary_command( 'basegroup1', 'echo 2 > /tmp/cloudifytestinteger', client, ) utils.execute_arbitrary_command( 'basegroup1', 'echo {time_now}:2.0 > /tmp/cloudifytestcounter'.format(time_now=int( time.time()), ), client, ) # Confirm the expected workflow runs utils.wait_for_execution_on_deployment('scale', 'basegroup1', client) utils.remove_deployment('basegroup1', client) utils.delete_blueprint('basegroup1', client) utils.remove_deployment('basegroup2', client) utils.delete_blueprint('basegroup2', client) utils.remove_nagios(client) utils.delete_plugins(installed_plugins, client) utils.remove_config_secrets(client) main_client.tenants.delete(tenant)
def test_ignore_unreachable(): tenant = 'test_aggregate_ignore_unreachable' config = utils.load_config() main_client = utils.get_rest_client_using_config( config, tenant='default_tenant', ) main_client.tenants.create(tenant) client = utils.get_rest_client_using_config( config, tenant=tenant, ) utils.upload_config_secrets(config, client) installed_plugins = utils.upload_required_plugins(client) utils.deploy_nagios( utils.get_examples_blueprint_path('nagios-aggregates.yaml'), utils.get_nagios_inputs(config), client, ) utils.install_blueprint( utils.get_examples_blueprint_path('baseaggregate.yaml'), utils.get_monitored_vms_inputs(config), 'baseaggregate', client, ) # Turn off SNMP on a node utils.execute_arbitrary_command( 'baseaggregate', 'sudo service snmpd stop', client, utils.get_first_node_instance('base_aggregate_host', 'baseaggregate', client)) # Then trigger a scale down utils.execute_arbitrary_command( 'baseaggregate', 'echo {time_now}:0 > /tmp/cloudifytestcounter'.format(time_now=int( time.time()), ), client, ) # Confirm the expected workflow runs utils.wait_for_execution_on_deployment( 'scale', 'baseaggregate', client, max_wait_for_start=120, ) utils.remove_deployment('baseaggregate', client) utils.delete_blueprint('baseaggregate', client) utils.remove_nagios(client) utils.delete_plugins(installed_plugins, client) utils.remove_config_secrets(client) main_client.tenants.delete(tenant)
def test_abort_on_unreachable(): tenant = 'test_aggregate_abort_on_unreachable' config = utils.load_config() main_client = utils.get_rest_client_using_config( config, tenant='default_tenant', ) main_client.tenants.create(tenant) client = utils.get_rest_client_using_config( config, tenant=tenant, ) utils.upload_config_secrets(config, client) installed_plugins = utils.upload_required_plugins(client) utils.deploy_nagios( utils.get_examples_blueprint_path('nagios-aggregates.yaml'), utils.get_nagios_inputs(config), client, ) utils.install_blueprint( utils.get_examples_blueprint_path('baseaggregate.yaml'), utils.get_monitored_vms_inputs(config), 'baseaggregate', client, ) # Turn off SNMP on a node utils.execute_arbitrary_command( 'baseaggregate', 'sudo service snmpd stop', client, utils.get_first_node_instance('base_aggregate_host', 'baseaggregate', client)) # We should see an UNKNOWN check state within 60 seconds saw_unknown_state = False for check in range(60): result = utils.execute_arbitrary_command( 'nagios', 'sudo tail -n10 /var/log/nagios/check_snmp_aggregate.log ' '| grep UNKNOWN', client, ) if result['status'] == 0: saw_unknown_state = True break time.sleep(1) assert saw_unknown_state utils.remove_deployment('baseaggregate', client) utils.delete_blueprint('baseaggregate', client) utils.remove_nagios(client) utils.delete_plugins(installed_plugins, client) utils.remove_config_secrets(client) main_client.tenants.delete(tenant)
def test_external_trap_triggers_heal(): tenant = 'test_external_trap_triggers_heal' config = utils.load_config() main_client = utils.get_rest_client_using_config( config, tenant='default_tenant', ) main_client.tenants.create(tenant) client = utils.get_rest_client_using_config( config, tenant=tenant, ) utils.upload_config_secrets(config, client) installed_plugins = utils.upload_required_plugins(client) utils.deploy_nagios( utils.get_examples_blueprint_path('nagios-traps.yaml'), utils.get_nagios_inputs(config), client, ) utils.install_blueprint( utils.get_examples_blueprint_path('basetrap.yaml'), utils.get_monitored_vms_inputs(config), 'basetrap', client, ) # Send the trap utils.execute_arbitrary_command( 'nagios', 'snmptrap -v2c -c testcommunity {ip} "" {oid} ' '.1.3.6.1.4.1.52312.0.1.1 s "Test message" ' '.1.3.6.1.4.1.52312.0.1.2 s "The address is {node_address}"'.format( ip='localhost', oid='.1.3.6.1.4.1.52312.0.0.2', node_address=utils.get_first_node_instance_ip( node='base_trap_host', deployment='basetrap', client=client, ), ), client, ) # Confirm the expected workflow runs utils.wait_for_execution_on_deployment('heal', 'basetrap', client) utils.remove_deployment('basetrap', client) utils.delete_blueprint('basetrap', client) utils.remove_nagios(client) utils.delete_plugins(installed_plugins, client) utils.remove_config_secrets(client) main_client.tenants.delete(tenant)
def test_heal_on_threshold_exceeded(): tenant = 'test_value_breach_heal' config = utils.load_config() main_client = utils.get_rest_client_using_config( config, tenant='default_tenant', ) main_client.tenants.create(tenant) client = utils.get_rest_client_using_config( config, tenant=tenant, ) utils.upload_config_secrets(config, client) installed_plugins = utils.upload_required_plugins(client) utils.deploy_nagios( utils.get_examples_blueprint_path('nagios-values.yaml'), utils.get_nagios_inputs(config), client, ) utils.install_blueprint( utils.get_examples_blueprint_path('basevalue.yaml'), utils.get_monitored_vms_inputs(config), 'basevalue', client, ) utils.execute_arbitrary_command( 'basevalue', 'echo 42 > /tmp/cloudifytestinteger', client, ) # Confirm the expected workflow runs utils.wait_for_execution_on_deployment('heal', 'basevalue', client) utils.remove_deployment('basevalue', client) utils.delete_blueprint('basevalue', client) utils.remove_nagios(client) utils.delete_plugins(installed_plugins, client) utils.remove_config_secrets(client) main_client.tenants.delete(tenant)
def test_trap_triggers_scale_down(): tenant = 'test_trap_triggers_scale_down' config = utils.load_config() main_client = utils.get_rest_client_using_config( config, tenant='default_tenant', ) main_client.tenants.create(tenant) client = utils.get_rest_client_using_config( config, tenant=tenant, ) utils.upload_config_secrets(config, client) installed_plugins = utils.upload_required_plugins(client) utils.deploy_nagios( utils.get_examples_blueprint_path('nagios-traps.yaml'), utils.get_nagios_inputs(config), client, ) utils.install_blueprint( utils.get_examples_blueprint_path('basetrap.yaml'), utils.get_monitored_vms_inputs(config), 'basetrap', client, ) utils.execute( 'basetrap', 'scale', client, parameters={ 'scalable_entity_name': 'base_trap_host', 'delta': '+1', 'scale_compute': True, }, ) # Send the trap utils.execute_arbitrary_command( 'basetrap', 'snmptrap -v2c -c testcommunity {ip} "" {oid}'.format( ip=utils.get_nagios_internal_ip(client), oid='.1.3.6.1.4.1.52312.0.0.1', ), client, ) # Confirm the expected workflow runs utils.wait_for_execution_on_deployment('scale', 'basetrap', client) utils.remove_deployment('basetrap', client) utils.delete_blueprint('basetrap', client) utils.remove_nagios(client) utils.delete_plugins(installed_plugins, client) utils.remove_config_secrets(client) main_client.tenants.delete(tenant)
def test_trap_not_triggered_constraints(): tenant = 'test_trap_not_triggering_constraints' config = utils.load_config() main_client = utils.get_rest_client_using_config( config, tenant='default_tenant', ) main_client.tenants.create(tenant) client = utils.get_rest_client_using_config( config, tenant=tenant, ) utils.upload_config_secrets(config, client) installed_plugins = utils.upload_required_plugins(client) utils.deploy_nagios( utils.get_examples_blueprint_path('nagios-traps.yaml'), utils.get_nagios_inputs(config), client, ) utils.install_blueprint( utils.get_examples_blueprint_path('basetrap.yaml'), utils.get_monitored_vms_inputs(config), 'basetrap', client, ) # Send the trap utils.execute_arbitrary_command( 'basetrap', 'snmptrap -v2c -c testcommunity {ip} "" {oid}'.format( ip=utils.get_nagios_internal_ip(client), oid='.1.3.6.1.4.1.52312.0.0.1', ), client, ) # We should see a log entry stating that there was no reaction within 5 # seconds saw_no_reaction = False for check in range(5): result = utils.execute_arbitrary_command( 'nagios', 'sudo tail -n10 /var/log/nagios/notify_cloudify.log ' '| grep "No reaction"', client, ) if result['status'] == 0: saw_no_reaction = True break time.sleep(1) assert saw_no_reaction utils.remove_deployment('basetrap', client) utils.delete_blueprint('basetrap', client) utils.remove_nagios(client) utils.delete_plugins(installed_plugins, client) utils.remove_config_secrets(client) main_client.tenants.delete(tenant)
def test_groups_do_not_collide(): tenant = 'test_groups_do_not_collide' config = utils.load_config() main_client = utils.get_rest_client_using_config( config, tenant='default_tenant', ) main_client.tenants.create(tenant) client = utils.get_rest_client_using_config( config, tenant=tenant, ) utils.upload_config_secrets(config, client) installed_plugins = utils.upload_required_plugins(client) utils.deploy_nagios( utils.get_examples_blueprint_path('nagios-groups-nocollide.yaml'), utils.get_nagios_inputs(config), client, ) utils.install_blueprint( utils.get_examples_blueprint_path('basegroup-nocollide.yaml'), utils.get_monitored_vms_inputs(config), 'basegroup-nocollide', client, ) attempt = 0 counter_result = 'no checks' # Until the dependent checks have run there will be a response indicating # that there are "no checks associated" while 'no checks' in counter_result: # Wait up to a 30 seconds (this should be about twice as long as is # needed) assert attempt < 10, 'Timed out waiting for counter check' time.sleep(3) counter_result = utils.execute_arbitrary_command( 'nagios', 'sudo /usr/lib64/nagios/plugins/check_group_aggregate ' '--approach="arithmetic_mean" --tenant="{tenant}" ' '--group-instance="crossdeploymentcountergroup" ' '--unknown="ignore" ' '--group-type="Test check group counter"'.format(tenant=tenant, ), client, )['output'] attempt += 1 # This should be a reasonably large number assert int(get_check_value(counter_result)) > 100000 attempt = 0 value_result = 'no checks' # Until the dependent checks have run there will be a response indicating # that there are "no checks associated" while 'no checks' in value_result: # Wait up to a 30 seconds (this should be about twice as long as is # needed) assert attempt < 10, 'Timed out waiting for value check' time.sleep(3) value_result = utils.execute_arbitrary_command( 'nagios', 'sudo /usr/lib64/nagios/plugins/check_group_aggregate ' '--approach="arithmetic_mean" --tenant="{tenant}" ' '--group-instance="crossdeploymentvaluegroup" --unknown="ignore" ' '--group-type="Test check group value"'.format(tenant=tenant, ), client, )['output'] attempt += 1 assert int(get_check_value(value_result)) == 0 utils.remove_deployment('basegroup-nocollide', client) utils.delete_blueprint('basegroup-nocollide', client) utils.remove_nagios(client) utils.delete_plugins(installed_plugins, client) utils.remove_config_secrets(client) main_client.tenants.delete(tenant)
def test_adding_and_updating_target_types(): tenant = 'test_adding_and_updating_target_types' config = utils.load_config() main_client = utils.get_rest_client_using_config( config, tenant='default_tenant', ) main_client.tenants.create(tenant) client = utils.get_rest_client_using_config( config, tenant=tenant, ) utils.upload_config_secrets(config, client) installed_plugins = utils.upload_required_plugins(client) utils.deploy_nagios( utils.get_examples_blueprint_path('nagios-update-1.yaml'), utils.get_nagios_inputs(config), client, ) # Confirm we currently have no target types nodes = [item['type'] for item in client.nodes.list(_include=['type'])] assert 'cloudify.nagios.nodes.TargetType' not in nodes # We don't yet have the target type we need, so let's add it client.blueprints.upload( path=utils.get_examples_blueprint_path('nagios-update-2.yaml'), entity_id='nagiosupdate2', ) client.deployment_updates.update_with_existing_blueprint( deployment_id='nagios', blueprint_id='nagiosupdate2', ) # Now that we have the target type we need, we can install our test node utils.install_blueprint( utils.get_examples_blueprint_path('baseupdate.yaml'), utils.get_monitored_vms_inputs(config), 'baseupdate', client, ) # Now we will set the test integer to return a higher value utils.execute_arbitrary_command( 'baseupdate', 'echo 10 > /tmp/cloudifytestinteger', client, ) # Allow time for the heal to start running if there is a problem time.sleep(60) # ...and then update the check threshold so that the check can actually run client.blueprints.upload( path=utils.get_examples_blueprint_path('nagios-update-3.yaml'), entity_id='nagiosupdate3', ) update = client.deployment_updates.update_with_existing_blueprint( deployment_id='nagios', blueprint_id='nagiosupdate3', reinstall_list=[utils.get_first_node_instance( 'base_update_instance', 'nagios', client, )], ) utils.wait_for_execution(update['execution_id'], client) # Reconcile because updated target types have to be re-created utils.execute( 'nagios', 'execute_operation', client, parameters={ 'node_ids': ['nagios'], 'operation': 'cloudify.interfaces.reconcile.monitoring', 'allow_kwargs_override': True, }, ) # Confirm the expected workflow runs # If the workflow already ran then it will already have healed so we won't # see it run again now. utils.wait_for_execution_on_deployment('heal', 'baseupdate', client) utils.remove_deployment('baseupdate', client) utils.delete_blueprint('baseupdate', client) utils.remove_nagios(client) utils.delete_blueprint('nagiosupdate2', client) utils.delete_blueprint('nagiosupdate3', client) utils.delete_plugins(installed_plugins, client) utils.remove_config_secrets(client) main_client.tenants.delete(tenant)