def errors(collapse: bool) -> Row: return Row( title="Error (System vs user)", collapse=collapse, panels=[ Graph( title="User errors", dataSource=DATASOURCE, targets=[ Target( expr= 'sum(rate(flyte:propeller:all:node:user_error_duration_ms_count{project=~"$project",domain=~"$domain",wf=~"$project:$domain:$workflow"}[5m]))', refId='A', ), ], yAxes=single_y_axis(format=SHORT_FORMAT), ), Graph( title="System errors", dataSource=DATASOURCE, targets=[ Target( expr= 'sum(rate(flyte:propeller:all:node:system_error_duration_ms_count{project=~"$project",domain=~"$domain",wf=~"$project:$domain:$workflow"}[5m]))', refId='A', ), ], yAxes=single_y_axis(format=SHORT_FORMAT), ), ])
def dynamic_wf_build() -> typing.List[Graph]: return [ Graph( title="Dynamic workflow build latency", dataSource=DATASOURCE, targets=[ Target( expr= f'sum(flyte:propeller:all:node:build_dynamic_workflow_us) by (quantile, wf) / 1000', refId='A', ), ], yAxes=single_y_axis(format=MILLISECONDS_FORMAT), ), Graph( title="Dynamic workflow build count", dataSource=DATASOURCE, targets=[ Target( expr= f'sum(rate(flyte:propeller:all:node:build_dynamic_workflow_us_count[5m])) by (wf)', refId='A', ), ], yAxes=single_y_axis(format=NO_FORMAT), ), ]
def wf_event_recording() -> typing.List[Graph]: return [ Graph( title="wf event recording latency success", dataSource=DATASOURCE, targets=[ Target( expr= f'sum(flyte:propeller:all:workflow:event_recording:success_duration_ms) by (quantile, wf)', refId='A', ), ], yAxes=single_y_axis(format=MILLISECONDS_FORMAT), ), Graph( title="wf event recording count", dataSource=DATASOURCE, targets=[ Target( expr= f'sum(rate(flyte:propeller:all:workflow:event_recording:success_duration_ms_count[5m])) by (wf)', legendFormat="success", refId='A', ), Target( expr= f'sum(rate(flyte:propeller:all:workflow:event_recording:failure_duration_ms_count[5m])) by (wf)', legendFormat="failure", refId='A', ), ], yAxes=single_y_axis(format=NO_FORMAT), ), ]
def wf_store_latency(collapse: bool) -> Row: return Row( title="etcD write metrics", collapse=collapse, panels=[ Graph( title="wf update etcD latency", dataSource=DATASOURCE, targets=[ Target( expr= f'sum(flyte:propeller:all:wf_update_latency_ms) by (quantile)', refId='A', ), ], yAxes=single_y_axis(format=MILLISECONDS_FORMAT), ), Graph( title="etcD writes", dataSource=DATASOURCE, targets=[ Target( expr= f'sum(rate(flyte:propeller:all:wf_update_latency_ms_count[5m]))', refId='A', ), ], yAxes=single_y_axis(format=NO_FORMAT), ), Graph( title="etcD write conflicts", dataSource=DATASOURCE, targets=[ Target( expr= f'sum(rate(flyte:propeller:all:wf_update_conflict[5m]))', refId='A', ), ], yAxes=single_y_axis(format=NO_FORMAT), ), Graph( title="etcD write fail", dataSource=DATASOURCE, targets=[ Target( expr= f'sum(rate(flyte:propeller:all:wf_update_failed[5m]))', refId='A', ), ], yAxes=single_y_axis(format=NO_FORMAT), ), ])
def generate_rds_free_storage_space_graph(name: str, cloudwatch_data_source: str): """ Generate rds graph """ y_axes = single_y_axis(format=BYTES) targets = [ CloudwatchMetricsTarget( alias="Free storage", metricName="FreeStorageSpace", statistics=["Minimum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", refId=ALERT_REF_ID, ), ] return Graph( title="Free storage", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, transparent=TRANSPARENT, editable=EDITABLE, bars=False, lines=True, ).auto_ref_ids()
def node_errors() -> Graph: return Graph( title="node event recording count", dataSource=DATASOURCE, targets=[ Target( expr= f'sum(rate(flyte:propeller:all:node:perma_system_error_duration_unlabeled_ms_count[5m]))', legendFormat="system error", refId='A', ), Target( expr= f'sum(rate(flyte:propeller:all:node:perma_user_error_duration_unlabeled_ms[5m]))', legendFormat="user error", refId='A', ), Target( expr= f'sum(rate(flyte:propeller:all:node:perma_unknown_error_duration_unlabeled_ms[5m]))', legendFormat="user error", refId='A', ), ], yAxes=single_y_axis(format=NO_FORMAT), )
def create_lambda_sqs_graph(name: str, cloudwatch_data_source: str, fifo: bool): """Create SQS graph""" if fifo: name += ".fifo" targets = [ CloudwatchMetricsTarget( alias="Number of messages sent to the queue", namespace="AWS/SQS", statistics=["Sum"], metricName="NumberOfMessagesSent", dimensions={"QueueName": name}, ) ] yAxes = single_y_axis(format=SHORT_FORMAT) return Graph( title="SQS: {}".format(name), dataSource=cloudwatch_data_source, targets=targets, yAxes=yAxes, transparent=TRANSPARENT, editable=EDITABLE, ).auto_ref_ids()
def generate_firehose_graph(influxdb_data_source: str) -> Graph: """ Generate Firehose graph """ y_axes = single_y_axis(format=SHORT_FORMAT) targets = [ InfluxDBTarget( alias=FIREHOSE_INCOMING_RECORDS_ALIAS, query='SELECT sum("incoming_records_sum") FROM "{}"."{}" WHERE ("delivery_stream_name" =~ /^$firehose$/) AND $timeFilter GROUP BY time(5m), "delivery_stream_name" fill(0)'.format( # noqa: E501 RETENTION_POLICY, FIREHOSE_MEASUREMENT ), rawQuery=RAW_QUERY, ), InfluxDBTarget( alias=FIREHOSE_DELIVERY_TO_S3_SUCCESS_ALIAS, query='SELECT sum("delivery_to_s3._success_sum") FROM "{}"."{}" WHERE ("delivery_stream_name" =~ /^$firehose$/) AND $timeFilter GROUP BY time(5m), "delivery_stream_name" fill(0)'.format( # noqa: E501 RETENTION_POLICY, FIREHOSE_MEASUREMENT ), rawQuery=RAW_QUERY, ), InfluxDBTarget( alias=FIREHOSE_DELIVERY_TO_S3_ALIAS, query='SELECT sum("delivery_to_s3._records_sum") FROM "{}"."{}" WHERE ("delivery_stream_name" =~ /^$firehose$/) AND $timeFilter GROUP BY time(5m), "delivery_stream_name" fill(0)'.format( # noqa: E501 RETENTION_POLICY, FIREHOSE_MEASUREMENT ), rawQuery=RAW_QUERY, ), ] series_overrides = [ { "alias": FIREHOSE_INCOMING_RECORDS_ALIAS, "color": colors.ORANGE, }, { "alias": FIREHOSE_DELIVERY_TO_S3_ALIAS, "color": colors.YELLOW, }, { "alias": FIREHOSE_DELIVERY_TO_S3_SUCCESS_ALIAS, "color": colors.GREEN, "zindex": 1, }, ] return Graph( title="Firehose: $firehose", dataSource=influxdb_data_source, targets=targets, yAxes=y_axes, seriesOverrides=series_overrides, transparent=TRANSPARENT, editable=EDITABLE, bars=True, lines=False, ).auto_ref_ids()
def generate_elasticache_redis_swap_and_memory_usage_graph( cache_cluster_id: str, cloudwatch_data_source: str) -> Graph: """ Generate ElastiCache Redis graph """ y_axes = single_y_axis(format=BYTES) aliases = { "bytes": "Bytes used for cache", "swap": "Swap Usage", } targets = [ CloudwatchMetricsTarget( alias=aliases["bytes"], namespace=NAMESPACE, period="1m", statistics=["Maximum"], dimensions={"CacheClusterId": cache_cluster_id}, metricName="BytesUsedForCache", ), CloudwatchMetricsTarget( alias=aliases["swap"], namespace=NAMESPACE, period="1m", statistics=["Maximum"], dimensions={"CacheClusterId": cache_cluster_id}, metricName="SwapUsage", ), ] series_overrides = [ { "alias": aliases["swap"], "color": colors.BLUE, "lines": True, "bars": False, }, { "alias": aliases["bytes"], "color": colors.GREEN, "lines": True, "bars": False, }, ] return Graph( title="Memory and Swap usage", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, seriesOverrides=series_overrides, transparent=TRANSPARENT, editable=EDITABLE, bars=True, lines=False, ).auto_ref_ids()
def PercentageAxes(label=None, max=1): """Y axes that show a percentage based on a unit value.""" return G.single_y_axis( format=G.PERCENT_UNIT_FORMAT, label=label, logBase=1, max=max, min=0, )
def generate_elasticsearch_automated_snapshot_failure_alert_graph( name: str, client_id: str, cloudwatch_data_source: str, notifications: List[str]): """ Generate Elasticsearch graph """ y_axes = single_y_axis(format=SHORT_FORMAT) targets = [ CloudwatchMetricsTarget( alias="Automated snapshot failure", namespace=NAMESPACE, period="1m", statistics=["Maximum"], dimensions={ "DomainName": name, "ClientId": client_id }, metricName="AutomatedSnapshotFailure", refId=ALERT_REF_ID, ), ] alert = None if notifications: alert = Alert( name="Elasticsearch automated snapshot failure alert", message="Elasticsearch automated snapshot failure alert", executionErrorState="alerting", alertConditions=[ AlertCondition( Target(refId=ALERT_REF_ID), timeRange=TimeRange("5m", "now"), evaluator=GreaterThan(0), reducerType=RTYPE_MAX, operator=OP_OR, ), ], frequency="2m", gracePeriod="2m", notifications=notifications, ) return Graph( title="Elasticsearch automated snapshot failure alerts", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, transparent=TRANSPARENT, editable=EDITABLE, bars=False, lines=True, alert=alert, ).auto_ref_ids()
def resource_stats(collapse: bool) -> Row: return Row( title="Task stats", collapse=collapse, panels=[ Graph( title="Pending tasks", dataSource=DATASOURCE, targets=[ Target( expr= 'sum(kube_pod_container_status_waiting * on(pod) group_left(label_execution_id, label_task_name, label_node_id, label_workflow_name) kube_pod_labels{label_execution_id !="",namespace=~"$project-$domain",label_workflow_name=~"$workflow"}) by (namespace, label_execution_id, label_task_name, label_node_id, label_workflow_name) > 0', refId='A', ), ], yAxes=single_y_axis(format=SHORT_FORMAT), ), Graph( title="Memory Usage Percentage", dataSource=DATASOURCE, targets=[ Target( expr= '(max(container_memory_rss{image!=""} * on(pod) group_left(label_execution_id, label_task_name, label_node_id, label_workflow_name) kube_pod_labels{label_execution_id !="",namespace=~"$project-$domain",label_workflow_name=~"$workflow"} * on(pod) group_left(phase) kube_pod_status_phase{phase="Running"}) by (namespace, pod, label_execution_id, label_task_name, label_node_id, label_workflow_name) / max(kube_pod_container_resource_limits_memory_bytes{container!=""} * on(pod) group_left(label_execution_id, label_task_name, label_node_id, label_workflow_name) kube_pod_labels{label_execution_id !=""} * on(pod) group_left(phase) kube_pod_status_phase{phase="Running"}) by (namespace, pod, label_execution_id, label_task_name, label_node_id, label_workflow_name)) > 0', refId='A', ), ], yAxes=single_y_axis(format=SHORT_FORMAT), ), Graph( title="CPU Usage Percentage", dataSource=DATASOURCE, targets=[ Target( expr= '(sum(rate(container_cpu_usage_seconds_total{image!=""}[2m]) * on(pod) group_left(label_execution_id, label_task_name, label_node_id, label_workflow_name) kube_pod_labels{label_execution_id !="",namespace=~"$project-$domain",label_workflow_name=~"$workflow"} * on(pod) group_left(phase) kube_pod_status_phase{phase="Running"}) by (namespace, pod, label_execution_id, label_task_name, label_node_id, label_workflow_name) / sum(kube_pod_container_resource_limits_cpu_cores{container!=""} * on(pod) group_left(label_execution_id, label_task_name, label_node_id, label_workflow_name) kube_pod_labels{label_execution_id !=""} * on(pod) group_left(phase) kube_pod_status_phase{phase="Running"}) by (namespace, pod, label_execution_id, label_task_name, label_node_id, label_workflow_name)) > 0', refId='A', ), ], yAxes=single_y_axis(format=SHORT_FORMAT), ), ])
def round_latency(interval: int = 1) -> Graph: return Graph( title=f"round Latency by quantile", dataSource=DATASOURCE, targets=[ Target( expr=f'sum(rate(flyte:propeller:all:round:raw_unlabeled_ms[{interval}m])) by (quantile)', refId='A', ), ], yAxes=single_y_axis(format=MILLISECONDS_FORMAT), )
def api_call_latency(title, metric, verb, scope, threshold): return d.Graph( title=title, targets=[ d.Target(expr=str(threshold), legendFormat="threshold"), d.Target( expr='quantile_over_time(0.99, %(metric)s{quantile="0.99", verb=~"%(verb)s", scope=~"%(scope)s"}[12h])' % {"metric": metric, "verb": verb, "scope": scope} ), ], yAxes=g.single_y_axis(format=g.SECONDS_FORMAT), )
def api_latency(api: str, interval: int = 1) -> Graph: return Graph( title=f"{api} Latency", dataSource=DATASOURCE, targets=[ Target( expr=f'sum(rate(flyte:admin:{api}:duration_ms[{interval}m])) by (quantile)', refId='A', ), ], yAxes=single_y_axis(format=SECONDS_FORMAT), )
def node_input_latency() -> Graph: return Graph( title=f"Node Input latency quantile and workflow", dataSource=DATASOURCE, targets=[ Target( expr=f'sum(flyte:propeller:all:node:node_input_latency_ms) by (quantile, wf)', refId='A', ), ], yAxes=single_y_axis(format=MILLISECONDS_FORMAT), )
def round_latency_per_wf(interval: int = 1) -> Graph: return Graph( title=f"round Latency per workflow", dataSource=DATASOURCE, targets=[ Target( expr=f'sum(rate(flyte:propeller:all:round:raw_ms[{interval}m])) by (wf)', refId='A', ), ], yAxes=single_y_axis(format=MILLISECONDS_FORMAT), )
def generate_rds_network_throughput_graph(name: str, cloudwatch_data_source: str): """ Generate rds graph """ y_axes = single_y_axis(format=BYTES_SEC, min=None) targets = [ CloudwatchMetricsTarget( alias="RX", metricName="NetworkReceiveThroughput", statistics=["Maximum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", ), CloudwatchMetricsTarget( alias="TX", metricName="NetworkTransmitThroughput", statistics=["Maximum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", ), ] series_overrides = [ { "alias": "TX", "color": colors.GREEN, "transform": "negative-Y", "fillGradient": 10, }, { "alias": "RX", "color": colors.YELLOW, "fillGradient": 10 }, ] return Graph( title="Network throughput", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, transparent=TRANSPARENT, editable=EDITABLE, bars=False, lines=True, seriesOverrides=series_overrides, ).auto_ref_ids()
def generate_rds_transaction_id_graph(name: str, cloudwatch_data_source: str, notifications: List[str]): """ Generate rds graph """ y_axes = single_y_axis(format=SHORT_FORMAT) targets = [ CloudwatchMetricsTarget( alias="Transaction ids used", metricName="MaximumUsedTransactionIDs", statistics=["Maximum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", refId=ALERT_REF_ID, ), ] alert = None if notifications: alert = Alert( name="{} transaction ids used Errors".format(name), message="{} is having transaction ids used errors".format(name), executionErrorState="alerting", alertConditions=[ AlertCondition( Target(refId=ALERT_REF_ID), timeRange=TimeRange("5m", "now"), evaluator=GreaterThan(1000000000), reducerType=RTYPE_MAX, operator=OP_AND, ) ], gracePeriod="2m", frequency="2m", notifications=notifications, ) return Graph( title="Transaction ids used", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, transparent=TRANSPARENT, editable=EDITABLE, bars=False, lines=True, alert=alert, ).auto_ref_ids()
def generate_rds_database_connections_graph(name: str, cloudwatch_data_source: str): """ Generate rds graph """ y_axes = single_y_axis(format=SHORT_FORMAT) min_alias = "min" max_alias = "max" mean_alias = "mean" targets = [ CloudwatchMetricsTarget( alias=max_alias, namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, metricName="DatabaseConnections", statistics=["Maximum"], period="1m", refId=ALERT_REF_ID, ), CloudwatchMetricsTarget( alias=mean_alias, metricName="DatabaseConnections", statistics=["Average"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", ), CloudwatchMetricsTarget( alias=min_alias, metricName="DatabaseConnections", statistics=["Minimum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", ), ] series_overrides = get_series_overrides(min_alias, mean_alias, max_alias) return Graph( title="Database connections", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, seriesOverrides=series_overrides, transparent=TRANSPARENT, editable=EDITABLE, bars=False, lines=True, ).auto_ref_ids()
def db_latency(entity: str, op: str, interval: int = 1) -> Graph: return Graph( title=f"{op} Latency", dataSource=DATASOURCE, targets=[ Target( expr= f'sum(flyte:admin:database:postgres:repositories:{entity}:{op}_ms) by (quantile)', refId='A', ), ], yAxes=single_y_axis(format=MILLISECONDS_FORMAT), )
def api_call_latency(title, verb, scope, threshold): return d.Graph( title=title, targets=[ g.Target(expr=str(threshold), legendFormat="threshold"), g.Target( expr='apiserver:apiserver_request_latency_1m:histogram_quantile{quantile="0.99", verb=~"%(verb)s", scope=~"%(scope)s"}' % {"verb": verb, "scope": scope}, legendFormat="{{verb}} {{scope}}/{{resource}}", ), ], yAxes=g.single_y_axis(format=g.SECONDS_FORMAT), )
def create_lambda_sqs_dlq_graph(name: str, cloudwatch_data_source: str, fifo: bool, notifications: List[str]): """Create SQS Deadletter graph""" if fifo: name += ".fifo" targets = [ CloudwatchMetricsTarget( alias="Approximate number of messages available", namespace="AWS/SQS", statistics=["Maximum"], metricName="ApproximateNumberOfMessagesVisible", dimensions={"QueueName": name}, refId=ALERT_REF_ID if notifications else None, ) ] yAxes = single_y_axis(format=SHORT_FORMAT) alert = None # https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-monitoring-using-cloudwatch.html # https://aws.amazon.com/about-aws/whats-new/2019/12/amazon-sqs-now-supports-1-minute-cloudwatch-metrics/ if notifications: alert = Alert( name="{} messages".format(name), message="{} is having messages".format(name), executionErrorState="alerting", alertConditions=[ AlertCondition( Target(refId=ALERT_REF_ID), timeRange=TimeRange("5m", "now"), evaluator=GreaterThan(0), reducerType=RTYPE_MAX, operator=OP_AND, ), ], gracePeriod="5m", notifications=notifications, ) return Graph( title="SQS Dead Letter Queue: {}".format(name), dataSource=cloudwatch_data_source, targets=targets, yAxes=yAxes, transparent=TRANSPARENT, editable=EDITABLE, alert=alert, alertThreshold=ALERT_THRESHOLD, ).auto_ref_ids()
def run(self): templateList = [ G.Template(default="", dataSource="default", name="serverid", label="ServerID", query="label_values(serverid)") ] dashboard = G.Dashboard(title=self.options.title, templating=G.Templating(list=templateList)) # Simple table processing - could be enhanced to use GridPos etc. for metric in metrics: if 'section' in metric: dashboard.rows.append( G.Row(title=metric['section'], showTitle=True)) continue if 'row' in metric: dashboard.rows.append(G.Row(title='', showTitle=False)) continue graph = G.Graph(title=metric['title'], dataSource='default', maxDataPoints=1000, legend=G.Legend(show=True, alignAsTable=True, min=True, max=True, avg=True, current=True, total=True, sort='max', sortDesc=True), yAxes=G.single_y_axis()) ref_id = 'A' for texp in metric['expr']: graph.targets.append(G.Target(expr=texp, refId=ref_id)) ref_id = chr(ord(ref_id) + 1) dashboard.rows[-1].panels.append(graph) # Auto-number panels - returns new dashboard dashboard = dashboard.auto_panel_ids() s = io.StringIO() write_dashboard(dashboard, s) print("""{ "dashboard": %s } """ % s.getvalue())
def node_event_recording_latency() -> Graph: return Graph( title=f"Node Event event recording latency quantile and workflow", dataSource=DATASOURCE, targets=[ Target( expr=f'sum(flyte:propeller:all:node:event_recording:success_duration_ms) by (quantile, wf)', refId='A', ), Target( expr=f'sum(flyte:propeller:all:node:event_recording:failure_duration_ms) by (quantile, wf)', refId='B', ), ], yAxes=single_y_axis(format=MILLISECONDS_FORMAT), )
def api_call_latency(title, verb, scope, threshold): return d.Graph( title=title, targets=[ g.Target(expr=str(threshold), legendFormat="threshold"), g.Target( expr=d.one_line(expression % { "verb": verb, "scope": scope }), # TODO(github.com/grafana/grafana/issues/19410): uncomment once fixed # legendFormat="{{verb}} {{scope}}/{{resource}}", ), ], yAxes=g.single_y_axis(format=g.SECONDS_FORMAT), )
def metastore_cache_hit_percentage(interval: int) -> Graph: """ TODO replace with metric math maybe? """ return Graph( title="cache hit percentage", dataSource=DATASOURCE, targets=[ Target( expr= f'(sum(rate(flyte:propeller:all:metastore:cache_hit[{interval}m])) * 100) / (sum(rate(flyte:propeller:all:metastore:cache_miss[{interval}m])) + sum(rate(flyte:propeller:all:metastore:cache_hit[{interval}m])))', refId='A', ), ], yAxes=single_y_axis(format=PERCENT_FORMAT), )
def generate_rds_disk_ops_graph(name: str, cloudwatch_data_source: str): """ Generate rds graph """ y_axes = single_y_axis(format=SHORT_FORMAT, min=None) targets = [ CloudwatchMetricsTarget( alias="write iops", metricName="WriteIOPS", statistics=["Maximum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", ), CloudwatchMetricsTarget( alias="read iops", metricName="ReadIOPS", statistics=["Maximum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", ), CloudwatchMetricsTarget( alias="disk queue depth", metricName="DiskQueueDepth", statistics=["Maximum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", ), ] return Graph( title="Disk iops", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, transparent=TRANSPARENT, editable=EDITABLE, bars=False, lines=True, ).auto_ref_ids()
def generate_elasticache_redis_latency_graph( cache_cluster_id: str, cloudwatch_data_source: str) -> Graph: """ Generate ElastiCache Redis graph """ y_axes = single_y_axis(format=MILLISECONDS_FORMAT) aliases = { "latency": "String based CMDs latency", } targets = [ CloudwatchMetricsTarget( alias=aliases["latency"], namespace=NAMESPACE, period="1m", statistics=["Maximum"], dimensions={"CacheClusterId": cache_cluster_id}, metricName="StringBasedCmdsLatency", ), ] series_overrides = [ { "alias": aliases["latency"], "color": colors.GREEN, "lines": True, "bars": False, }, ] return Graph( title="Latency", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, seriesOverrides=series_overrides, transparent=TRANSPARENT, editable=EDITABLE, bars=True, lines=False, ).auto_ref_ids()
def generate_elasticache_redis_connections_graph( cache_cluster_id: str, cloudwatch_data_source: str) -> Graph: """ Generate ElastiCache Redis graph """ y_axes = single_y_axis(format=SHORT_FORMAT) aliases = { "current": "Current connections", } targets = [ CloudwatchMetricsTarget( alias=aliases["current"], namespace=NAMESPACE, period="1m", statistics=["Maximum"], dimensions={"CacheClusterId": cache_cluster_id}, metricName="CurrConnections", refId=ALERT_REF_ID, ), ] series_overrides = [ { "alias": aliases["current"], "color": colors.GREEN, "lines": True, "bars": False, }, ] return Graph( title="Current connections", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, seriesOverrides=series_overrides, transparent=TRANSPARENT, editable=EDITABLE, bars=True, lines=False, ).auto_ref_ids()