Exemplo n.º 1
0
 def node_errors() -> Graph:
     return Graph(
         title="node event recording count",
         dataSource=DATASOURCE,
         targets=[
             Target(
                 expr=
                 f'sum(rate(flyte:propeller:all:node:perma_system_error_duration_unlabeled_ms_count[5m]))',
                 legendFormat="system error",
                 refId='A',
             ),
             Target(
                 expr=
                 f'sum(rate(flyte:propeller:all:node:perma_user_error_duration_unlabeled_ms[5m]))',
                 legendFormat="user error",
                 refId='A',
             ),
             Target(
                 expr=
                 f'sum(rate(flyte:propeller:all:node:perma_unknown_error_duration_unlabeled_ms[5m]))',
                 legendFormat="user error",
                 refId='A',
             ),
         ],
         yAxes=single_y_axis(format=NO_FORMAT),
     )
Exemplo n.º 2
0
 def wf_event_recording() -> typing.List[Graph]:
     return [
         Graph(
             title="wf event recording latency success",
             dataSource=DATASOURCE,
             targets=[
                 Target(
                     expr=
                     f'sum(flyte:propeller:all:workflow:event_recording:success_duration_ms) by (quantile, wf)',
                     refId='A',
                 ),
             ],
             yAxes=single_y_axis(format=MILLISECONDS_FORMAT),
         ),
         Graph(
             title="wf event recording count",
             dataSource=DATASOURCE,
             targets=[
                 Target(
                     expr=
                     f'sum(rate(flyte:propeller:all:workflow:event_recording:success_duration_ms_count[5m])) by (wf)',
                     legendFormat="success",
                     refId='A',
                 ),
                 Target(
                     expr=
                     f'sum(rate(flyte:propeller:all:workflow:event_recording:failure_duration_ms_count[5m])) by (wf)',
                     legendFormat="failure",
                     refId='A',
                 ),
             ],
             yAxes=single_y_axis(format=NO_FORMAT),
         ),
     ]
Exemplo n.º 3
0
def number_of_active_processes_graph(grr_component):
    return Graph(
        title="Number of Active Processes",
        targets=[
            Target(
                expr='sum(up{{job="grr_{}"}})'.format(grr_component),
                legendFormat="Active Processes",
            ),
        ],
        alert=Alert(
            name="Number of Active Processes alert",
            message="The number of active {} processes is below {}".format(
                grr_component.capitalize(),
                config.ACTIVE_PROCESSES_ALERTING_CONDITION),
            alertConditions=[
                AlertCondition(Target(
                    expr='sum(up{{job="grr_{}"}})'.format(grr_component),
                    legendFormat="Active Processes",
                ),
                               timeRange=TimeRange("10s", "now"),
                               evaluator=LowerThan(
                                   config.ACTIVE_PROCESSES_ALERTING_CONDITION),
                               operator=OP_AND,
                               reducerType=RTYPE_SUM)
            ],
        ))
Exemplo n.º 4
0
 def errors(collapse: bool) -> Row:
     return Row(
         title="Error (System vs user)",
         collapse=collapse,
         panels=[
             Graph(
                 title="User errors",
                 dataSource=DATASOURCE,
                 targets=[
                     Target(
                         expr=
                         'sum(rate(flyte:propeller:all:node:user_error_duration_ms_count{project=~"$project",domain=~"$domain",wf=~"$project:$domain:$workflow"}[5m]))',
                         refId='A',
                     ),
                 ],
                 yAxes=single_y_axis(format=SHORT_FORMAT),
             ),
             Graph(
                 title="System errors",
                 dataSource=DATASOURCE,
                 targets=[
                     Target(
                         expr=
                         'sum(rate(flyte:propeller:all:node:system_error_duration_ms_count{project=~"$project",domain=~"$domain",wf=~"$project:$domain:$workflow"}[5m]))',
                         refId='A',
                     ),
                 ],
                 yAxes=single_y_axis(format=SHORT_FORMAT),
             ),
         ])
Exemplo n.º 5
0
 def dynamic_wf_build() -> typing.List[Graph]:
     return [
         Graph(
             title="Dynamic workflow build latency",
             dataSource=DATASOURCE,
             targets=[
                 Target(
                     expr=
                     f'sum(flyte:propeller:all:node:build_dynamic_workflow_us) by (quantile, wf) / 1000',
                     refId='A',
                 ),
             ],
             yAxes=single_y_axis(format=MILLISECONDS_FORMAT),
         ),
         Graph(
             title="Dynamic workflow build count",
             dataSource=DATASOURCE,
             targets=[
                 Target(
                     expr=
                     f'sum(rate(flyte:propeller:all:node:build_dynamic_workflow_us_count[5m])) by (wf)',
                     refId='A',
                 ),
             ],
             yAxes=single_y_axis(format=NO_FORMAT),
         ),
     ]
Exemplo n.º 6
0
 def error_codes(api: str, interval: int = 1) -> Graph:
     return Graph(
         title=f"{api} return codes",
         dataSource=DATASOURCE,
         targets=[
             Target(
                 expr=f'sum(irate(flyte:admin:{api}:codes:OK[{interval}m]))',
                 legendFormat="ok",
                 refId='A',
             ),
             Target(
                 expr=f'sum(irate(flyte:admin:{api}:codes:InvalidArgument[{interval}m]))',
                 legendFormat="invalid-args",
                 refId='B',
             ),
             Target(
                 expr=f'sum(irate(flyte:admin:{api}:codes:AlreadyExists[{interval}m]))',
                 legendFormat="already-exists",
                 refId='C',
             ),
             Target(
                 expr=f'sum(irate(flyte:admin:{api}:codes:FailedPrecondition[{interval}m]))',
                 legendFormat="failed-precondition",
                 refId='D',
             ),
         ],
         yAxes=YAxes(
             YAxis(format=OPS_FORMAT),
             YAxis(format=SHORT_FORMAT),
         ),
     )
Exemplo n.º 7
0
 def wf_store_latency(collapse: bool) -> Row:
     return Row(
         title="etcD write metrics",
         collapse=collapse,
         panels=[
             Graph(
                 title="wf update etcD latency",
                 dataSource=DATASOURCE,
                 targets=[
                     Target(
                         expr=
                         f'sum(flyte:propeller:all:wf_update_latency_ms) by (quantile)',
                         refId='A',
                     ),
                 ],
                 yAxes=single_y_axis(format=MILLISECONDS_FORMAT),
             ),
             Graph(
                 title="etcD writes",
                 dataSource=DATASOURCE,
                 targets=[
                     Target(
                         expr=
                         f'sum(rate(flyte:propeller:all:wf_update_latency_ms_count[5m]))',
                         refId='A',
                     ),
                 ],
                 yAxes=single_y_axis(format=NO_FORMAT),
             ),
             Graph(
                 title="etcD write conflicts",
                 dataSource=DATASOURCE,
                 targets=[
                     Target(
                         expr=
                         f'sum(rate(flyte:propeller:all:wf_update_conflict[5m]))',
                         refId='A',
                     ),
                 ],
                 yAxes=single_y_axis(format=NO_FORMAT),
             ),
             Graph(
                 title="etcD write fail",
                 dataSource=DATASOURCE,
                 targets=[
                     Target(
                         expr=
                         f'sum(rate(flyte:propeller:all:wf_update_failed[5m]))',
                         refId='A',
                     ),
                 ],
                 yAxes=single_y_axis(format=NO_FORMAT),
             ),
         ])
Exemplo n.º 8
0
def threadpool_outstanding_tasks_vs_threads_num(grr_component):
  return Graph(
    title="Outstanding Tasks vs. Number of Threads",
    targets=[
      Target(
        expr='sum(threadpool_outstanding_tasks{{job="grr_{}"}})'.format(grr_component),
        legendFormat="Outstanding Tasks",
      ),
      Target(
        expr='sum(threadpool_threads{{job="grr_{}"}})'.format(grr_component),
        legendFormat="Threads",
      ),
    ])
Exemplo n.º 9
0
    def export(self, output_file=''):
        from baskerville.models.metrics.registry import metrics_registry
        panels = []
        for i, (metric_name,
                value) in enumerate(metrics_registry.registry.items()):
            if i % 4 == 0 or i == len(metrics_registry.registry):
                self.rows.append(Row(panels=panels))
                panels = []

            if 'timer' in metric_name:
                g = Gauge()
                g.maxValue = 0
                g.maxValue = 100
                g.show = True
                g.thresholdMarkers = True
                panels.append(
                    SingleStat(
                        title=metric_name,
                        dataSource=self.ds,
                        gauge=g,
                        targets=[
                            Target(
                                expr=
                                f'({metric_name}_sum / {metric_name}_count)',
                                target=metric_name,
                                refId='A',
                                metric=metric_name,
                                datasource=self.ds,
                            )
                        ]))
            else:
                panels.append(
                    Graph(title=metric_name,
                          dataSource=self.ds,
                          targets=[
                              Target(expr=f'{metric_name}_total' if 'total'
                                     in metric_name else metric_name,
                                     target=metric_name,
                                     refId='A',
                                     metric=metric_name,
                                     datasource=self.ds)
                          ]))

        for panel in panels:
            self.rows.append(Row(panels=[panel]))

        self.dashboard = Dashboard(title=self.dash_title,
                                   rows=self.rows).auto_panel_ids()

        with open(output_file, 'w') as f:
            write_dashboard(self.dashboard, f)
Exemplo n.º 10
0
 def quota_stats(collapse: bool) -> Row:
     return Row(
         title="Kubernetes Quota Usage stats",
         collapse=collapse,
         panels=[
             Graph(
                 title="CPU Limits vs usage",
                 dataSource=DATASOURCE,
                 targets=[
                     Target(
                         expr=
                         'kube_resourcequota{resource="limits.cpu", namespace="$project-$domain", type="hard"}',
                         refId='A',
                         legendFormat="max cpu",
                     ),
                     Target(
                         expr=
                         'kube_resourcequota{resource="limits.cpu", namespace="$project-$domain", type="used"}',
                         refId='B',
                         legendFormat="used cpu",
                     ),
                 ],
                 yAxes=YAxes(
                     YAxis(format=OPS_FORMAT),
                     YAxis(format=SHORT_FORMAT),
                 ),
             ),
             Graph(
                 title="Mem Limits vs usage",
                 dataSource=DATASOURCE,
                 targets=[
                     Target(
                         expr=
                         'kube_resourcequota{resource="limits.memory", namespace="$project-$domain", type="hard"}',
                         refId='A',
                         legendFormat="max mem",
                     ),
                     Target(
                         expr=
                         'kube_resourcequota{resource="limits.memory", namespace="$project-$domain", type="used"}',
                         refId='B',
                         legendFormat="used mem",
                     ),
                 ],
                 yAxes=YAxes(
                     YAxis(format=OPS_FORMAT),
                     YAxis(format=SHORT_FORMAT),
                 ),
             ),
         ])
Exemplo n.º 11
0
    def test_should_generate_lambda_memory_utilization_percentage_graph_with_alert_notifications(
        self, ):
        lambda_name = "lambda-1"
        cloudwatch_data_source = "cloudwatch"
        lambda_insights_namespace = "insights"
        notifications = ["lorem", "ipsum"]

        expected_alert_query = CloudwatchMetricsTarget(
            alias="Avg",
            namespace=lambda_insights_namespace,
            statistics=["Average"],
            metricName="memory_utilization",
            dimensions={"function_name": lambda_name},
            refId="A",
        )

        generated_lambda_graph = lambda_generate_memory_utilization_percentage_graph(
            name=lambda_name,
            cloudwatch_data_source=cloudwatch_data_source,
            lambda_insights_namespace=lambda_insights_namespace,
            notifications=notifications,
        )
        generated_lambda_graph.should.have.property("alert").be.a(Alert)
        generated_lambda_graph.alert.executionErrorState.should.eql("alerting")
        generated_lambda_graph.alert.noDataState.should.eql("no_data")
        generated_lambda_graph.alert.alertConditions.should.have.length_of(1)
        generated_lambda_graph.alert.alertConditions[0].should.be.a(
            AlertCondition)
        generated_lambda_graph.alert.alertConditions[0].target.should.eql(
            Target(refId="A"))
        generated_lambda_graph.targets.should.contain(expected_alert_query)
Exemplo n.º 12
0
    def test_should_generate_mem_utilization_percentage_with_alerts_graph(self):
        name = "service-1"
        cloudwatch_data_source = "prod"
        cluster_name = "cluster-1"
        grid_pos = GridPos(1, 2, 3, 4)
        notifications = ["foo", "bar", "baz"]

        expected_alert_condition = AlertCondition(
            Target(refId="A"),
            timeRange=TimeRange("15m", "now"),
            evaluator=GreaterThan(85),
            reducerType=RTYPE_MAX,
            operator=OP_AND,
        )

        panel = generate_mem_utilization_percentage_graph(
            name=name,
            cloudwatch_data_source=cloudwatch_data_source,
            cluster_name=cluster_name,
            grid_pos=grid_pos,
            notifications=notifications,
        )

        panel.alert.should.be.a(Alert)
        panel.alert.alertConditions.should.have.length_of(1)
        panel.alert.alertConditions[0].should.eql(expected_alert_condition)
        panel.alert.notifications.should.eql(notifications)
Exemplo n.º 13
0
    def test_should_generate_pending_count_with_alerts_graph(self):
        name = "service-1"
        cloudwatch_data_source = "prod"
        cluster_name = "cluster-1"
        grid_pos = GridPos(1, 2, 3, 4)
        notifications = ["foo", "bar"]

        panel = generate_pending_count_graph(
            name=name,
            cloudwatch_data_source=cloudwatch_data_source,
            cluster_name=cluster_name,
            grid_pos=grid_pos,
            notifications=notifications,
        )

        panel.alert.should.be.a(Alert)
        panel.alert.gracePeriod.should.eql("15m")
        panel.alert.alertConditions.should.have.length_of(1)
        panel.alert.alertConditions[0].should.eql(
            AlertCondition(
                Target(refId="A"),
                timeRange=TimeRange("5m", "now"),
                evaluator=GreaterThan(0),
                reducerType=RTYPE_MAX,
                operator=OP_AND,
            )
        )
Exemplo n.º 14
0
    def test_should_generate_res_count_graph_with_alert(self):
        name = "service-1"
        cloudwatch_data_source = "prod"
        loadbalancer = "loadbalancer-1"
        target_group = "target-group-1"
        grid_pos = GridPos(1, 2, 3, 4)
        notifications = ["foo", "bar", "baz"]

        panel = generate_res_count_graph(
            name=name,
            cloudwatch_data_source=cloudwatch_data_source,
            grid_pos=grid_pos,
            loadbalancer=loadbalancer,
            target_group=target_group,
            notifications=notifications,
        )
        panel.alert.should.be.a(Alert)
        panel.alert.message.should.eql("{} has 5XX errors".format(name))
        panel.alert.alertConditions.should.have.length_of(1)
        panel.alert.alertConditions.should.eql(
            [
                AlertCondition(
                    Target(refId="A"),
                    timeRange=TimeRange("15m", "now"),
                    evaluator=GreaterThan(0),
                    reducerType=RTYPE_MAX,
                    operator=OP_AND,
                ),
            ]
        )
Exemplo n.º 15
0
def generate_elasticsearch_status_red_alert_graph(
        name: str, client_id: str, cloudwatch_data_source: str,
        notifications: List[str]) -> Graph:
    """
    Generate Elasticsearch graph
    """

    y_axes = YAxes(
        YAxis(format=SHORT_FORMAT),
        YAxis(format=SHORT_FORMAT),
    )

    targets = [
        CloudwatchMetricsTarget(
            alias="Red status",
            namespace=NAMESPACE,
            period="1m",
            statistics=["Maximum"],
            dimensions={
                "DomainName": name,
                "ClientId": client_id
            },
            metricName="ClusterStatus.red",
        ),
    ]

    alert = None

    if notifications:
        alert = Alert(
            name="Elasticsearch is in status red",
            message="Elasticsearch is in status red",
            executionErrorState="alerting",
            alertConditions=[
                AlertCondition(
                    Target(refId=ALERT_REF_ID),
                    timeRange=TimeRange("5m", "now"),
                    evaluator=GreaterThan(0),
                    reducerType=RTYPE_MAX,
                    operator=OP_OR,
                ),
            ],
            frequency="2m",
            gracePeriod="2m",
            notifications=notifications,
        )

    return Graph(
        title="Status RED alerts",
        dataSource=cloudwatch_data_source,
        targets=targets,
        yAxes=y_axes,
        transparent=TRANSPARENT,
        editable=EDITABLE,
        bars=True,
        lines=False,
        alert=alert,
    ).auto_ref_ids()
Exemplo n.º 16
0
def db_operations_errors(grr_component):
  return Graph(
    title="Database Operations Errors Rate by Call",
    targets=[
      Target(
        expr='sum by (call) (rate(db_request_errors_total{{job="grr_{0}"}}[10m]))'.format(grr_component),
        legendFormat="{{call}}",
      ),
    ])
Exemplo n.º 17
0
def db_operations_latency(grr_component):
  return Graph(
    title="Database Operations Latency by Call",
    targets=[
      Target(
        expr='sum by (call) (rate(db_request_latency_sum{{job="grr_{0}"}}[10m]) / rate(db_request_latency_count{{job="grr_{0}"}}[10m]))'.format(grr_component),
        legendFormat="{{call}}",
      ),
    ])
Exemplo n.º 18
0
def sum_process_memory_bytes(grr_component):
  return Graph(
    title="Sum of Process Memory Bytes (across all instances)",
    targets=[
      Target(
        expr='sum(process_resident_memory_bytes{{job="grr_{}"}})'.format(grr_component),
        legendFormat="Resident Memory",
      ),
    ])
Exemplo n.º 19
0
 def metastore_failures():
     # Copy counts sum(rate(flyte:propeller:all:metastore:copy:overall_unlabeled_ms_count[5m]))
     return Graph(
         title=f"Failures from metastore",
         dataSource=DATASOURCE,
         targets=[
             Target(
                 expr=
                 f'sum(rate(flyte:propeller:all:metastore:head_failure_unlabeled[5m]))',
                 legendFormat="head-failure",
                 refId='A',
             ),
             Target(
                 expr=
                 f'sum(rate(flyte:propeller:all:metastore:bad_container_unlabeled[5m]))',
                 legendFormat="bad-container",
                 refId='A',
             ),
             Target(
                 expr=
                 f'sum(rate(flyte:propeller:all:metastore:bad_key_unlabeled[5m]))',
                 legendFormat="bad-key",
                 refId='A',
             ),
             Target(
                 expr=
                 f'sum(rate(flyte:propeller:all:metastore:read_failure_unlabeled[5m]))',
                 legendFormat="read-failure",
                 refId='A',
             ),
             Target(
                 expr=
                 f'sum(rate(flyte:propeller:all:metastore:write_failure_unlabeled[5m]))',
                 legendFormat="write-failure",
                 refId='A',
             ),
         ],
         yAxes=YAxes(
             YAxis(format=OPS_FORMAT),
             YAxis(format=SHORT_FORMAT),
         ),
     )
Exemplo n.º 20
0
 def resource_stats(collapse: bool) -> Row:
     return Row(
         title="Task stats",
         collapse=collapse,
         panels=[
             Graph(
                 title="Pending tasks",
                 dataSource=DATASOURCE,
                 targets=[
                     Target(
                         expr=
                         'sum(kube_pod_container_status_waiting * on(pod) group_left(label_execution_id, label_task_name, label_node_id, label_workflow_name) kube_pod_labels{label_execution_id !="",namespace=~"$project-$domain",label_workflow_name=~"$workflow"}) by (namespace, label_execution_id, label_task_name, label_node_id, label_workflow_name) > 0',
                         refId='A',
                     ),
                 ],
                 yAxes=single_y_axis(format=SHORT_FORMAT),
             ),
             Graph(
                 title="Memory Usage Percentage",
                 dataSource=DATASOURCE,
                 targets=[
                     Target(
                         expr=
                         '(max(container_memory_rss{image!=""} * on(pod) group_left(label_execution_id, label_task_name, label_node_id, label_workflow_name) kube_pod_labels{label_execution_id !="",namespace=~"$project-$domain",label_workflow_name=~"$workflow"} * on(pod) group_left(phase) kube_pod_status_phase{phase="Running"}) by (namespace, pod, label_execution_id, label_task_name, label_node_id, label_workflow_name) / max(kube_pod_container_resource_limits_memory_bytes{container!=""} * on(pod) group_left(label_execution_id, label_task_name, label_node_id, label_workflow_name) kube_pod_labels{label_execution_id !=""} * on(pod) group_left(phase) kube_pod_status_phase{phase="Running"}) by (namespace, pod, label_execution_id, label_task_name, label_node_id, label_workflow_name)) > 0',
                         refId='A',
                     ),
                 ],
                 yAxes=single_y_axis(format=SHORT_FORMAT),
             ),
             Graph(
                 title="CPU Usage Percentage",
                 dataSource=DATASOURCE,
                 targets=[
                     Target(
                         expr=
                         '(sum(rate(container_cpu_usage_seconds_total{image!=""}[2m]) * on(pod) group_left(label_execution_id, label_task_name, label_node_id, label_workflow_name) kube_pod_labels{label_execution_id !="",namespace=~"$project-$domain",label_workflow_name=~"$workflow"} * on(pod) group_left(phase) kube_pod_status_phase{phase="Running"}) by (namespace, pod, label_execution_id, label_task_name, label_node_id, label_workflow_name) / sum(kube_pod_container_resource_limits_cpu_cores{container!=""} * on(pod) group_left(label_execution_id, label_task_name, label_node_id, label_workflow_name) kube_pod_labels{label_execution_id !=""} * on(pod) group_left(phase) kube_pod_status_phase{phase="Running"}) by (namespace, pod, label_execution_id, label_task_name, label_node_id, label_workflow_name)) > 0',
                         refId='A',
                     ),
                 ],
                 yAxes=single_y_axis(format=SHORT_FORMAT),
             ),
         ])
Exemplo n.º 21
0
 def admin_launcher_cache() -> Graph:
     return Graph(
         title="Admin Launcher cache",
         dataSource=DATASOURCE,
         targets=[
             Target(
                 expr=
                 f'sum(rate(flyte:propeller:all:admin_launcher:cache_hit[5m]))',
                 legendFormat="hit",
                 refId='A',
             ),
             Target(
                 expr=
                 f'sum(rate(flyte:propeller:all:admin_launcher:cache_miss[5m]))',
                 legendFormat="miss",
                 refId='B',
             ),
         ],
         yAxes=single_y_axis(format=MILLISECONDS_FORMAT),
     )
Exemplo n.º 22
0
def generate_desired_count_graph(
    name: str,
    cluster_name: str,
    max: int,
    cloudwatch_data_source: str,
    notifications: List[str],
    grid_pos: GridPos,
):
    targets = [
        CloudwatchMetricsTarget(
            alias="Containers",
            namespace=CONTAINER_INSIGHTS_NAMESPACE,
            statistics=["Maximum"],
            metricName="DesiredTaskCount",
            dimensions={
                "ServiceName": name,
                "ClusterName": cluster_name
            },
            refId=ALERT_REF_ID,
        ),
    ]

    alert = None
    if notifications and max > 1:
        alert = Alert(
            name="{} Desired count of containers nearing the max".format(name),
            message="{} is having Desired count of containers nearing the max".
            format(name),
            executionErrorState="alerting",
            alertConditions=[
                AlertCondition(
                    Target(refId=ALERT_REF_ID),
                    timeRange=TimeRange("15m", "now"),
                    evaluator=GreaterThan(0.9 * max),  # 90% of max
                    reducerType=RTYPE_MAX,
                    operator=OP_AND,
                )
            ],
            gracePeriod="1m",
            notifications=notifications,
        )

    return Graph(
        title="Desired Tasks",
        dataSource=cloudwatch_data_source,
        targets=targets,
        transparent=TRANSPARENT,
        editable=EDITABLE,
        alert=alert,
        gridPos=grid_pos,
        alertThreshold=ALERT_THRESHOLD,
    ).auto_ref_ids()
Exemplo n.º 23
0
def generate_rds_transaction_id_graph(name: str, cloudwatch_data_source: str,
                                      notifications: List[str]):
    """
    Generate rds graph
    """

    y_axes = single_y_axis(format=SHORT_FORMAT)

    targets = [
        CloudwatchMetricsTarget(
            alias="Transaction ids used",
            metricName="MaximumUsedTransactionIDs",
            statistics=["Maximum"],
            namespace=NAMESPACE,
            dimensions={"DBInstanceIdentifier": name},
            period="1m",
            refId=ALERT_REF_ID,
        ),
    ]

    alert = None

    if notifications:
        alert = Alert(
            name="{} transaction ids used Errors".format(name),
            message="{} is having transaction ids used errors".format(name),
            executionErrorState="alerting",
            alertConditions=[
                AlertCondition(
                    Target(refId=ALERT_REF_ID),
                    timeRange=TimeRange("5m", "now"),
                    evaluator=GreaterThan(1000000000),
                    reducerType=RTYPE_MAX,
                    operator=OP_AND,
                )
            ],
            gracePeriod="2m",
            frequency="2m",
            notifications=notifications,
        )

    return Graph(
        title="Transaction ids used",
        dataSource=cloudwatch_data_source,
        targets=targets,
        yAxes=y_axes,
        transparent=TRANSPARENT,
        editable=EDITABLE,
        bars=False,
        lines=True,
        alert=alert,
    ).auto_ref_ids()