def test_should_generate_deployment_graph(self): name = "service-1" cloudwatch_data_source = "prod" cluster_name = "cluster-1" grid_pos = GridPos(1, 2, 3, 4) panel = generate_deployment_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, cluster_name=cluster_name, grid_pos=grid_pos, ) panel.should.be.a(TimeSeries) panel.title.should.eql("Deployments") panel.dataSource.should.eql(cloudwatch_data_source) panel.targets.should.have.length_of(1) panel.targets[0].should.eql( CloudwatchMetricsTarget( alias="Deployment", namespace="ECS/ContainerInsights", statistics=["Maximum"], metricName="DeploymentCount", dimensions={"ServiceName": name, "ClusterName": cluster_name}, ) ) panel.gridPos.should.eql(grid_pos)
def test_should_generate_res_count_graph_with_alert(self): name = "service-1" cloudwatch_data_source = "prod" loadbalancer = "loadbalancer-1" target_group = "target-group-1" grid_pos = GridPos(1, 2, 3, 4) notifications = ["foo", "bar", "baz"] panel = generate_res_count_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, grid_pos=grid_pos, loadbalancer=loadbalancer, target_group=target_group, notifications=notifications, ) panel.alert.should.be.a(Alert) panel.alert.message.should.eql("{} has 5XX errors".format(name)) panel.alert.alertConditions.should.have.length_of(1) panel.alert.alertConditions.should.eql( [ AlertCondition( Target(refId="A"), timeRange=TimeRange("15m", "now"), evaluator=GreaterThan(0), reducerType=RTYPE_MAX, operator=OP_AND, ), ] )
def test_should_generate_pending_count_with_alerts_graph(self): name = "service-1" cloudwatch_data_source = "prod" cluster_name = "cluster-1" grid_pos = GridPos(1, 2, 3, 4) notifications = ["foo", "bar"] panel = generate_pending_count_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, cluster_name=cluster_name, grid_pos=grid_pos, notifications=notifications, ) panel.alert.should.be.a(Alert) panel.alert.gracePeriod.should.eql("15m") panel.alert.alertConditions.should.have.length_of(1) panel.alert.alertConditions[0].should.eql( AlertCondition( Target(refId="A"), timeRange=TimeRange("5m", "now"), evaluator=GreaterThan(0), reducerType=RTYPE_MAX, operator=OP_AND, ) )
def test_should_generate_mem_utilization_percentage_with_alerts_graph(self): name = "service-1" cloudwatch_data_source = "prod" cluster_name = "cluster-1" grid_pos = GridPos(1, 2, 3, 4) notifications = ["foo", "bar", "baz"] expected_alert_condition = AlertCondition( Target(refId="A"), timeRange=TimeRange("15m", "now"), evaluator=GreaterThan(85), reducerType=RTYPE_MAX, operator=OP_AND, ) panel = generate_mem_utilization_percentage_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, cluster_name=cluster_name, grid_pos=grid_pos, notifications=notifications, ) panel.alert.should.be.a(Alert) panel.alert.alertConditions.should.have.length_of(1) panel.alert.alertConditions[0].should.eql(expected_alert_condition) panel.alert.notifications.should.eql(notifications)
def test_should_generate_pending_count_graph(self): name = "service-1" cloudwatch_data_source = "prod" cluster_name = "cluster-1" grid_pos = GridPos(1, 2, 3, 4) notifications = [] panel = generate_pending_count_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, cluster_name=cluster_name, grid_pos=grid_pos, notifications=notifications, ) panel.should.be.a(Graph) panel.title.should.eql("Pending Tasks") panel.dataSource.should.eql(cloudwatch_data_source) panel.targets.should.have.length_of(1) panel.targets[0].should.eql( CloudwatchMetricsTarget( alias="Containers", namespace="ECS/ContainerInsights", statistics=["Maximum"], metricName="PendingTaskCount", dimensions={"ServiceName": name, "ClusterName": cluster_name}, refId="A", ) ) panel.gridPos.should.eql(grid_pos)
def test_should_generate_req_count_graph(self): cloudwatch_data_source = "prod" loadbalancer = "loadbalancer-1" target_group = "target-group-1" grid_pos = GridPos(1, 2, 3, 4) panel = generate_req_count_graph( cloudwatch_data_source=cloudwatch_data_source, grid_pos=grid_pos, loadbalancer=loadbalancer, target_group=target_group, ) panel.should.be.a(Graph) panel.title.should.eql("Requests") panel.dataSource.should.eql(cloudwatch_data_source) panel.targets.should.have.length_of(2)
def test_should_generate_error_logs_panel(self): name = "service-1" grid_pos = GridPos(1, 2, 3, 4) elasticsearch_data_source = "es" es_query = 'tag: "booking-api" AND log.level: [50 TO *]' panel = generate_error_logs_panel( lucene_query=es_query, grid_pos=grid_pos, elasticsearch_data_source=elasticsearch_data_source, ) panel.should.be.a(Logs) panel.title.should.eql("Error Logs") panel.gridPos.should.eql(grid_pos) panel.dataSource.should.eql(elasticsearch_data_source) panel.targets.should.have.length_of(1) panel.targets[0].query.should.eql(es_query)
def test_should_generate_mem_utilization_graph(self): name = "service-1" cloudwatch_data_source = "prod" cluster_name = "cluster-1" grid_pos = GridPos(1, 2, 3, 4) panel = generate_mem_utilization_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, cluster_name=cluster_name, grid_pos=grid_pos, ) panel.should.be.a(Graph) panel.title.should.eql("Memory Utilization") panel.dataSource.should.eql(cloudwatch_data_source) panel.targets.should.have.length_of(4) panel.gridPos.should.eql(grid_pos)
def test_should_generate_running_count_stats_panel(self): name = "service-1" cloudwatch_data_source = "prod" cluster_name = "cluster-1" grid_pos = GridPos(1, 2, 3, 4) expected_targets = [ CloudwatchMetricsTarget( alias="Desired", namespace="ECS/ContainerInsights", statistics=["Maximum"], metricName="DesiredTaskCount", dimensions={"ServiceName": name, "ClusterName": cluster_name}, ), CloudwatchMetricsTarget( alias="Pending", namespace="ECS/ContainerInsights", statistics=["Maximum"], metricName="PendingTaskCount", dimensions={"ServiceName": name, "ClusterName": cluster_name}, ), CloudwatchMetricsTarget( alias="Running", namespace="ECS/ContainerInsights", statistics=["Maximum"], metricName="RunningTaskCount", dimensions={"ServiceName": name, "ClusterName": cluster_name}, refId="A", ), ] panel = generate_running_count_stats_panel( name=name, cloudwatch_data_source=cloudwatch_data_source, cluster_name=cluster_name, grid_pos=grid_pos, ) panel.should.be.a(Stat) panel.title.should.eql("Task Count") panel.dataSource.should.eql(cloudwatch_data_source) panel.gridPos.should.eql(grid_pos) panel.colorMode.should.eql("background") panel.alignment.should.eql("center") panel.targets.should.have.length_of(3) panel.targets.should.eql(expected_targets)
def test_should_generate_desired_count_without_alerts_graph_when_desired_is_1(self): name = "service-1" cloudwatch_data_source = "prod" cluster_name = "cluster-1" grid_pos = GridPos(1, 2, 3, 4) max = 1 notifications = ["foo", "bar"] panel = generate_desired_count_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, cluster_name=cluster_name, grid_pos=grid_pos, max=max, notifications=notifications, ) panel.alert.should.eql(None)
def test_should_generate_cpu_utilization_graph(self): name = "service-1" cloudwatch_data_source = "prod" cluster_name = "cluster-1" grid_pos = GridPos(1, 2, 3, 4) panel = generate_cpu_utilization_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, cluster_name=cluster_name, grid_pos=grid_pos, ) expected_targets = [ CloudwatchMetricsTarget( alias="Min", namespace="AWS/ECS", statistics=["Minimum"], metricName="CPUUtilization", dimensions={"ServiceName": name, "ClusterName": cluster_name}, ), CloudwatchMetricsTarget( alias="Avg", namespace="AWS/ECS", statistics=["Average"], metricName="CPUUtilization", dimensions={"ServiceName": name, "ClusterName": cluster_name}, ), CloudwatchMetricsTarget( alias="Max", namespace="AWS/ECS", statistics=["Maximum"], metricName="CPUUtilization", dimensions={"ServiceName": name, "ClusterName": cluster_name}, ), ] panel.should.be.a(Graph) panel.title.should.eql("CPU Utilization Percentage") panel.dataSource.should.eql(cloudwatch_data_source) panel.targets.should.have.length_of(3) panel.targets.should.eql(expected_targets) panel.gridPos.should.eql(grid_pos)
def generate_ecs_alb_service_dashboard( name: str, cluster_name: str, cloudwatch_data_source: str, notifications: List[str], environment: str, loadbalancer: str, target_group: str, elasticsearch_data_source: str, lucene_query: str, max: int, *args, **kwargs, ): """Generate ECS Service dashboard""" tags = ["ecs", "ecs-service", "containers", "service", environment] panels = [ RowPanel( title="Summary", gridPos=GridPos(1, 24, 0, 0), ), generate_running_count_stats_panel( name=name, cluster_name=cluster_name, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 12, 0, 1), ), generate_deployment_graph( name=name, cluster_name=cluster_name, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 12, 12, 1), ), RowPanel(title="Capacity", gridPos=GridPos(1, 24, 0, 9)), generate_running_count_graph( name=name, cluster_name=cluster_name, max=max, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 8, 0, 10), notifications=notifications, ), generate_desired_count_graph( name=name, cluster_name=cluster_name, max=max, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 8, 8, 10), notifications=notifications, ), generate_pending_count_graph( name=name, cluster_name=cluster_name, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 8, 16, 10), notifications=notifications, ), RowPanel(title="Utilization", gridPos=GridPos(1, 24, 0, 18)), generate_cpu_utilization_graph( name=name, cluster_name=cluster_name, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 8, 0, 19), ), generate_mem_utilization_graph( name=name, cluster_name=cluster_name, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 8, 8, 19), ), generate_mem_utilization_percentage_graph( name=name, cluster_name=cluster_name, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 8, 16, 19), notifications=notifications, ), RowPanel(title="Requests and Responses", gridPos=GridPos(1, 24, 0, 27)), generate_req_count_graph( loadbalancer=loadbalancer, target_group=target_group, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 12, 0, 28), ), generate_res_count_graph( name=name, loadbalancer=loadbalancer, target_group=target_group, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 12, 12, 28), notifications=notifications, ), ] if elasticsearch_data_source and lucene_query: panels += [ RowPanel(title="Logs", gridPos=GridPos(1, 24, 0, 36)), generate_helpful_resources_panel(lucene_query=lucene_query, grid_pos=GridPos(8, 24, 0, 37)), generate_error_logs_panel( grid_pos=GridPos(24, 24, 0, 45), elasticsearch_data_source=elasticsearch_data_source, lucene_query=lucene_query, ), ] return Dashboard( title="{} {}".format("ECS Service:", name), editable=EDITABLE, tags=tags, timezone=TIMEZONE, sharedCrosshair=SHARED_CROSSHAIR, panels=panels, refresh=DEFAULT_REFRESH, ).auto_panel_ids()
template_list = [ Template(default="", name="instance", label="Machine", query="label_values(instance)"), chip_template, ] dashboard = Dashboard( title="Temperature", templating=Templating(template_list), panels=[ RowPanel( title="New Row", gridPos=GridPos(h=1, w=24, x=0, y=8), ), Graph( title="$chip", dataSource="Prometheus", targets=[ Target(expr=("sensors_temp_input{" + 'instance="$instance",chip="$chip"}'), legendFormat="{{feature}}", refId="A"), ], repeat=Repeat("v", "chip"), yAxes=YAxes( YAxis(format=CELSIUS_FORMAT), YAxis(format=SHORT_FORMAT), ),
def test_should_generate_res_count_graph(self): name = "service-1" cloudwatch_data_source = "prod" loadbalancer = "loadbalancer-1" target_group = "target-group-1" grid_pos = GridPos(1, 2, 3, 4) notifications = [] panel = generate_res_count_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, grid_pos=grid_pos, loadbalancer=loadbalancer, target_group=target_group, notifications=notifications, ) panel.should.be.a(Graph) panel.title.should.eql("Responses") panel.gridPos.should.eql(grid_pos) panel.dataSource.should.eql(cloudwatch_data_source) panel.targets.should.have.length_of(4) panel.targets.should.eql( [ CloudwatchMetricsTarget( alias="2xx", namespace="AWS/ApplicationELB", statistics=["Sum"], metricName="HTTPCode_Target_2XX_Count", dimensions={ "LoadBalancer": loadbalancer, "TargetGroup": target_group, }, ), CloudwatchMetricsTarget( alias="3xx", namespace="AWS/ApplicationELB", statistics=["Sum"], metricName="HTTPCode_Target_3XX_Count", dimensions={ "LoadBalancer": loadbalancer, "TargetGroup": target_group, }, ), CloudwatchMetricsTarget( alias="4xx", namespace="AWS/ApplicationELB", statistics=["Sum"], metricName="HTTPCode_Target_4XX_Count", dimensions={ "LoadBalancer": loadbalancer, "TargetGroup": target_group, }, ), CloudwatchMetricsTarget( alias="5xx", namespace="AWS/ApplicationELB", statistics=["Sum"], metricName="HTTPCode_Target_5XX_Count", dimensions={ "LoadBalancer": loadbalancer, "TargetGroup": target_group, }, refId="A", ), ] )
description="Example dashboard using the Random Walk and default Prometheus datasource", tags=[ 'example' ], timezone="browser", panels=[ TimeSeries( title="Random Walk", dataSource='default', targets=[ Target( datasource='grafana', expr='example', ), ], gridPos=GridPos(h=8, w=16, x=0, y=0), ), GaugePanel( title="Random Walk", dataSource='default', targets=[ Target( datasource='grafana', expr='example', ), ], gridPos=GridPos(h=4, w=4, x=17, y=0), ), TimeSeries( title="Prometheus http requests", dataSource='prometheus',
from grafanalib.core import ( Dashboard, Graph, GridPos, OPS_FORMAT, RowPanel, SHORT_FORMAT, SqlTarget, YAxes, YAxis, ) dashboard = Dashboard( title="Random stats from SQL DB", panels=[ RowPanel(title="New row", gridPos=GridPos(h=1, w=24, x=0, y=8)), Graph( title="Some SQL Queries", dataSource="Your SQL Source", targets=[ SqlTarget( rawSql= 'SELECT date as "time", metric FROM example WHERE $__timeFilter("time")', refId="A", ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), gridPos=GridPos(h=8, w=24, x=0, y=9),