def test_php_default(): with run_php_fpm() as host, Agent.run(f""" monitors: - type: collectd/php-fpm url: "http://{host}/status?json" name: {INSTANCE} """) as agent: verify(agent, METADATA.default_metrics) assert has_datapoint_with_dim( agent.fake_services, "plugin", "curl_json"), "Didn't get php-fpm datapoints" assert has_datapoint_with_dim( agent.fake_services, "plugin_instance", INSTANCE), "Didn't get right instance dimension on datapoints"
def run(config, metrics): with run_service("spark", command="bin/spark-class org.apache.spark.deploy.master.Master") as spark_master: master_ip = container_ip(spark_master) assert wait_for(p(tcp_socket_open, master_ip, 7077), 60), "master service didn't start" assert wait_for(p(tcp_socket_open, master_ip, 8080), 60), "master webui service didn't start" assert spark_master.exec_run("./sbin/start-history-server.sh").exit_code == 0, "history service didn't start" with run_service( "spark", command=f"bin/spark-class org.apache.spark.deploy.worker.Worker spark://{master_ip}:7077" ) as spark_worker: worker_ip = container_ip(spark_worker) assert wait_for(p(tcp_socket_open, worker_ip, 8081), 60), "worker webui service didn't start" spark_master.exec_run("nc -lk 9999", detach=True) spark_master.exec_run( f"bin/spark-submit --master spark://{master_ip}:7077 --conf spark.driver.host={master_ip} {SPARK_APP}", detach=True, ) assert wait_for(p(tcp_socket_open, master_ip, 4040), 60), "application service didn't start" config = config.format(master_ip=master_ip, worker_ip=worker_ip) with Agent.run(config) as agent: verify(agent, metrics, timeout=60) assert has_datapoint_with_dim( agent.fake_services, "plugin", "apache_spark" ), "Didn't get spark datapoints"
def test_docker_observer_labels_partial(): """ Test that docker observer picks up a partially configured endpoint from container labels """ with run_agent( dedent(""" observers: - type: docker monitors: - type: collectd/nginx discoveryRule: container_name =~ "nginx-disco-partial" && port == 80 """)) as [backend, _, _]: with run_service( "nginx", name="nginx-disco-partial", labels={ "agent.signalfx.com.config.80.extraDimensions": "{mydim: myvalue}" }, ): assert wait_for( p(has_datapoint_with_dim, backend, "plugin", "nginx")), "Didn't get nginx datapoints" assert wait_for( p(has_datapoint_with_dim, backend, "mydim", "myvalue")), "Didn't get extra dimension" # Let nginx be removed by docker observer and collectd restart time.sleep(5) backend.reset_datapoints() assert ensure_always( lambda: not has_datapoint_with_dim(backend, "container_name", "nginx-disco-partial"), 10)
def test_docker_observer_labels(): """ Test that docker observer picks up a fully configured endpoint from container labels """ with run_agent( dedent(""" observers: - type: docker """)) as [backend, _, _]: with run_service( "nginx", name="nginx-disco-full", labels={ "agent.signalfx.com.monitorType.80": "collectd/nginx", "agent.signalfx.com.config.80.intervalSeconds": "1", }, ): assert wait_for( p(has_datapoint_with_dim, backend, "plugin", "nginx")), "Didn't get nginx datapoints" # Let nginx be removed by docker observer and collectd restart time.sleep(5) backend.reset_datapoints() assert ensure_always( lambda: not has_datapoint_with_dim(backend, "container_name", "nginx-disco-full"), 10)
def test_ecs_observer_multi_containers(): with run_service("ecsmeta") as ecsmeta: with run_container("redis:4-alpine") as redis, run_container( "mongo:4") as mongo: with Agent.run( CONFIG.substitute( host=container_ip(ecsmeta), redis_ip=container_ip(redis), mongo_ip=container_ip(mongo), case="metadata_multi_containers", )) as agent: assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "container_image", "redis:latest")), "Didn't get redis datapoints" assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "container_image", "mongo:latest")), "Didn't get mongo datapoints" # Let redis be removed by docker observer and collectd restart time.sleep(5) agent.fake_services.datapoints.clear() assert ensure_always( lambda: not has_datapoint_with_dim( agent.fake_services, "ClusterName", "seon-fargate-test"), 10)
def test_hadoop_default(version): """ Any new versions of hadoop should be manually built, tagged, and pushed to quay.io, i.e. docker build \ -t quay.io/signalfx/hadoop-test:<version> \ --build-arg HADOOP_VER=<version> \ <repo_root>/test-services/hadoop docker push quay.io/signalfx/hadoop-test:<version> """ with run_container( "quay.io/signalfx/hadoop-test:%s" % version, hostname="hadoop-master") as hadoop_master, run_container( "quay.io/signalfx/hadoop-test:%s" % version, hostname="hadoop-worker1") as hadoop_worker1: host = start_hadoop(hadoop_master, hadoop_worker1) # start the agent with hadoop config config = f""" monitors: - type: collectd/hadoop host: {host} port: 8088 verbose: true """ with Agent.run(config) as agent: verify(agent, METADATA.default_metrics - EXCLUDED) # Need to run the agent manually because we want to wait for this metric to become 1 but it may # be 0 at first. assert wait_for( p(has_datapoint, agent.fake_services, "gauge.hadoop.cluster.metrics.active_nodes", {}, 1)), "expected 1 hadoop worker node" assert has_datapoint_with_dim( agent.fake_services, "plugin", "apache_hadoop"), "Didn't get hadoop datapoints"
def run(config, metrics): with run_service("apache") as apache_container: host = container_ip(apache_container) config = config.format(host=host) assert wait_for(p(tcp_socket_open, host, 80), 60), "service didn't start" with Agent.run(config) as agent: verify(agent, metrics) assert has_datapoint_with_dim(agent.fake_services, "plugin", "apache"), "Didn't get apache datapoints"
def run_all(version, metrics, extra_metrics=""): with run_kafka(version) as kafka: kafka_ip = container_ip(kafka) kafka_host = container_hostname(kafka) image = kafka.image.id # We add the Kafka broker host:ip as an extra_host because by default the Kafka broker advertises itself with # its hostname and without this the producer and consumer wouldn't be able to resolve the broker hostname. with run_producer(image, kafka_host, extra_hosts={ kafka_host: kafka_ip }) as kafkaproducerhost, run_consumer( image, kafka_host, extra_hosts={ kafka_host: kafka_ip }) as kafkaconsumerhost, Agent.run(f""" monitors: - type: collectd/kafka host: {kafka_ip} port: 7099 clusterName: testCluster extraMetrics: {extra_metrics} - type: collectd/kafka_producer host: {kafkaproducerhost} port: 8099 extraMetrics: {extra_metrics} - type: collectd/kafka_consumer host: {kafkaconsumerhost} port: 9099 extraMetrics: {extra_metrics} """) as agent: verify(agent, metrics) assert has_datapoint_with_dim( agent.fake_services, "cluster", "testCluster" ), "Didn't get cluster dimension from kafka datapoints" assert has_datapoint_with_dim( agent.fake_services, "client-id", "console-producer" ), "Didn't get client-id dimension from kafka_producer datapoints" assert has_datapoint_with_dim( agent.fake_services, "client-id", "consumer-1" ), "Didn't get client-id dimension from kafka_consumer datapoints"
def run(version, node_type, metrics, extra_metrics=""): with run_node(node_type, version) as (host, port): # start the agent with hadoopjmx config config = HADOOPJMX_CONFIG.format(host=host, port=port, nodeType=node_type, extraMetrics=extra_metrics) with Agent.run(config) as agent: verify(agent, metrics) # Check for expected dimension. assert has_datapoint_with_dim( agent.fake_services, "nodeType", node_type ), f"Didn't get hadoopjmx datapoints for nodeType {node_type}"
def test_basic_service_discovery(): with Agent.run(CONFIG) as agent: with run_service("nginx", name="nginx-basic-discovery"): assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "plugin", "nginx") ), "Didn't get nginx datapoints" # Let nginx be removed by docker observer and collectd restart time.sleep(5) agent.fake_services.reset_datapoints() assert ensure_always(lambda: not has_datapoint_with_dim(agent.fake_services, "plugin", "nginx"), 10)
def test_nginx_included(): with run_nginx() as host, Agent.run(f""" monitors: - type: collectd/nginx host: {host} port: 80 """) as agent: verify(agent, METADATA.included_metrics) assert has_datapoint_with_dim(agent.fake_services, "plugin", "nginx"), "Didn't get nginx datapoints"
def test_basic_service_discovery(): with run_agent(CONFIG) as [backend, _, _]: with run_service("nginx", name="nginx-basic-discovery"): assert wait_for( p(has_datapoint_with_dim, backend, "plugin", "nginx")), "Didn't get nginx datapoints" # Let nginx be removed by docker observer and collectd restart time.sleep(5) backend.datapoints.clear() assert ensure_always( lambda: not has_datapoint_with_dim(backend, "plugin", "nginx"), 10)
def test_docker_image_filtering(): with run_service("nginx") as nginx_container: with Agent.run(""" monitors: - type: docker-container-stats excludedImages: - "%s" """ % nginx_container.attrs["Image"]) as agent: assert ensure_always(lambda: not has_datapoint_with_dim( agent.fake_services, "container_id", nginx_container.id))
def test_kong_default(kong_version): with run_kong(kong_version) as kong_ip: config = f""" monitors: - type: collectd/kong host: {kong_ip} port: 8001 """ with Agent.run(config) as agent: verify(agent, METADATA.default_metrics) assert has_datapoint_with_dim(agent.fake_services, "plugin", "kong"), "Didn't get Kong dimension"
def test_supervisor_default(): with run_supervisor_fpm() as host, Agent.run( f""" monitors: - type: supervisor host: {host} port: {PORT} """ ) as agent: verify(agent, METADATA.default_metrics) assert has_datapoint_with_dim( agent.fake_services, "name", PROCESS ), "Didn't get process name dimension {}".format(PROCESS)
def test_elasticsearch_included(): with run_elasticsearch( environment={"cluster.name": "testCluster"}) as es_container: host = container_ip(es_container) config = f""" monitors: - type: collectd/elasticsearch host: {host} port: 9200 username: elastic password: testing123 """ with Agent.run(config) as agent: verify(agent, METADATA.default_metrics - EXCLUDED) assert has_datapoint_with_dim( agent.fake_services, "plugin", "elasticsearch"), "Didn't get elasticsearch datapoints" assert has_datapoint_with_dim( agent.fake_services, "plugin_instance", "testCluster"), "Cluster name not picked from read callback" assert not has_log_message(agent.output.lower(), "error"), "error found in agent output!"
def test_shutdown(): with Agent.run( dedent(""" monitors: - type: collectd/df - type: collectd/custom template: | LoadPlugin "filecount" <Plugin filecount> <Directory "/bin"> Instance "bin" </Directory> </Plugin> """)) as agent: assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "plugin", "filecount")), "Didn't get filecount datapoints" assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "plugin", "df")), "Didn't get df datapoints" agent.update_config( dedent(""" monitors: - type: collectd/df """)) time.sleep(3) agent.fake_services.reset_datapoints() assert ensure_always(lambda: not has_datapoint_with_dim( agent.fake_services, "plugin", "filecount" )), "Got filecount datapoint when we shouldn't have" agent.update_config( dedent(""" monitors: - type: collectd/df - type: collectd/custom template: | LoadPlugin "filecount" <Plugin filecount> <Directory "/bin"> Instance "bin" </Directory> </Plugin> """)) assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "plugin", "filecount")), "Didn't get filecount datapoints"
def test_custom_collectd_shutdown(): with run_agent( dedent( """ monitors: - type: collectd/df - type: collectd/custom template: | LoadPlugin "ping" <Plugin ping> Host "google.com" </Plugin> """ ) ) as [backend, _, configure]: assert wait_for(p(has_datapoint_with_dim, backend, "plugin", "ping")), "Didn't get ping datapoints" assert wait_for(p(has_datapoint_with_dim, backend, "plugin", "df")), "Didn't get df datapoints" configure( dedent( """ monitors: - type: collectd/df """ ) ) time.sleep(3) backend.datapoints.clear() assert ensure_always( lambda: not has_datapoint_with_dim(backend, "plugin", "ping") ), "Got ping datapoint when we shouldn't have" configure( dedent( """ monitors: - type: collectd/df - type: collectd/custom template: | LoadPlugin "ping" <Plugin ping> Host "google.com" </Plugin> """ ) ) assert wait_for(p(has_datapoint_with_dim, backend, "plugin", "ping")), "Didn't get ping datapoints"
def test_docker_stops_watching_destroyed_containers(): with run_service("nginx") as nginx_container: with run_agent(""" monitors: - type: docker-container-stats """) as [backend, _, _]: assert wait_for( p(has_datapoint_with_dim, backend, "container_id", nginx_container.id)), "Didn't get nginx datapoints" nginx_container.remove(force=True) time.sleep(5) backend.reset_datapoints() assert ensure_always(lambda: not has_datapoint_with_dim( backend, "container_id", nginx_container.id))
def test_kong_metric_config(): """Test turning on metric config flag allows through filter""" with run_kong(LATEST) as kong_ip: config = f""" monitors: - type: collectd/kong host: {kong_ip} port: 8001 metrics: - metric: connections_accepted report: true """ with Agent.run(config) as agent: verify(agent, METADATA.default_metrics | {"counter.kong.connections.accepted"}) assert has_datapoint_with_dim(agent.fake_services, "plugin", "kong"), "Didn't get Kong dimension"
def test_docker_stops_watching_paused_containers(): with run_service("nginx") as nginx_container: with Agent.run(""" monitors: - type: docker-container-stats """) as agent: assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "container_id", nginx_container.id)), "Didn't get nginx datapoints" nginx_container.pause() time.sleep(5) agent.fake_services.reset_datapoints() assert ensure_always(lambda: not has_datapoint_with_dim( agent.fake_services, "container_id", nginx_container.id))
def test_does_not_set_hostname_if_not_host_specific(): with Agent.run( """ hostname: acmeinc.com disableHostDimensions: true monitors: - type: cpu - type: processlist """ ) as agent: assert ensure_always( lambda: not has_datapoint_with_dim(agent.fake_services, "host", "acmeinc.com") ), "Got overridden hostname in datapoint" assert ensure_always( lambda: not has_event_with_dim(agent.fake_services, "host", "acmeinc.com") ), "Got overridden hostname in event"
def test_docker_observer(): with run_agent(CONFIG) as [backend, _, _]: with run_service("nginx", name="nginx-discovery", labels={"mylabel": "abc"}): assert wait_for( p(has_datapoint_with_dim, backend, "plugin", "nginx")), "Didn't get nginx datapoints" assert wait_for(p(has_datapoint_with_dim, backend, "mydim", "abc")), "Didn't get custom label dimension" # Let nginx be removed by docker observer and collectd restart time.sleep(5) backend.reset_datapoints() assert ensure_always( lambda: not has_datapoint_with_dim(backend, "container_name", "nginx-discovery"), 10)
def test_does_not_set_hostname_if_not_host_specific(): with run_agent(""" hostname: acmeinc.com disableHostDimensions: true monitors: - type: collectd/signalfx-metadata persistencePath: /dev/null - type: collectd/cpu - type: collectd/uptime """) as [backend, _, _]: assert ensure_always( lambda: not has_datapoint_with_dim(backend, "host", "acmeinc.com") ), "Got overridden hostname in datapoint" assert ensure_always( lambda: not has_event_with_dim(backend, "host", "acmeinc.com") ), "Got overridden hostname in event"
def test_couchbase_included(tag): with run_couchbase(tag) as host, Agent.run(f""" monitors: - type: collectd/couchbase host: {host} port: 8091 collectTarget: NODE username: administrator password: password """) as agent: verify( agent, (METADATA.metrics_by_group["nodes"] & METADATA.included_metrics) - EXCLUDED) assert has_datapoint_with_dim( agent.fake_services, "plugin", "couchbase"), "Didn't get couchbase datapoints"
def test_ecs_container_label_dimension(): with run_service("ecsmeta") as ecsmeta, run_container( "redis:4-alpine") as redis: ecsmeta_ip = container_ip(ecsmeta) redis_ip = container_ip(redis) with run_agent(""" monitors: - type: ecs-metadata metadataEndpoint: http://%s/metadata_single?redis_ip=%s statsEndpoint: http://%s/stats labelsToDimensions: container_name: container_title """ % (ecsmeta_ip, redis_ip, ecsmeta_ip)) as [backend, _, _]: assert ensure_always(lambda: not has_datapoint_with_dim( backend, "container_title", "ecs-seon-fargate-test-3-redis-baf2cfda88f8d8ee4900"))
def test_kong_extra_metric(): """Test adding extra metric enables underlying config metric""" # counter.kong.connections.handled chosen because it's not reported by default by the monitor # and is not a default metric. with run_kong(LATEST) as kong_ip: config = f""" monitors: - type: collectd/kong host: {kong_ip} port: 8001 extraMetrics: - counter.kong.connections.handled """ with Agent.run(config) as agent: verify(agent, METADATA.default_metrics | {"counter.kong.connections.handled"}) assert has_datapoint_with_dim(agent.fake_services, "plugin", "kong"), "Didn't get Kong dimension"
def test_does_not_set_hostname_on_monitor_if_not_host_specific(): with run_agent(""" hostname: acmeinc.com monitors: - type: collectd/signalfx-metadata persistencePath: /dev/null - type: collectd/cpu - type: collectd/uptime disableHostDimensions: true """) as [backend, _, _]: assert wait_for( p(has_datapoint_with_all_dims, backend, dict(host="acmeinc.com", plugin="signalfx-metadata")) ), "Didn't get overridden hostname in datapoint" assert ensure_always(lambda: not has_datapoint_with_dim( backend, "uptime", "acmeinc.com") ), "Got overridden hostname in datapoint"
def test_ecs_container_image_filtering(): with run_service("ecsmeta") as ecsmeta, run_container( "redis:4-alpine") as redis: ecsmeta_ip = container_ip(ecsmeta) redis_ip = container_ip(redis) with run_agent(""" monitors: - type: ecs-metadata metadataEndpoint: http://%s/metadata_single?redis_ip=%s statsEndpoint: http://%s/stats excludedImages: - redis:latest """ % (ecsmeta_ip, redis_ip, ecsmeta_ip)) as [backend, _, _]: assert ensure_always(lambda: not has_datapoint_with_dim( backend, "container_id", "c42fa5a73634bcb6e301dfb7b13ac7ead2af473210be6a15da75a290c283b66c" ))
def test_docker_observer_labels_multiple_monitors_per_port(): """ Test that we can configure multiple monitors per port using labels """ with Agent.run( dedent(""" observers: - type: docker """)) as agent: with run_service( "nginx", name="nginx-multi-monitors", labels={ "agent.signalfx.com.monitorType.80": "collectd/nginx", "agent.signalfx.com.config.80.intervalSeconds": "1", "agent.signalfx.com.config.80.extraDimensions": "{app: nginx}", "agent.signalfx.com.monitorType.80-nginx2": "collectd/nginx", "agent.signalfx.com.config.80-nginx2.intervalSeconds": "1", "agent.signalfx.com.config.80-nginx2.extraDimensions": "{app: other}", }, ): assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "plugin", "nginx")), "Didn't get nginx datapoints" assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "app", "nginx")), "Didn't get extra dims" assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "app", "other")), "Didn't get extra dims" # Let nginx be removed by docker observer and collectd restart time.sleep(5) agent.fake_services.reset_datapoints() assert ensure_always( lambda: not has_datapoint_with_dim( agent.fake_services, "container_name", "nginx-multi-monitors"), 10)