示例#1
0
def PromGraph(*args, **kwargs):
    """A graph of data from our Prometheus."""

    kwargs_with_defaults = dict(
        tooltip=G.Tooltip(sort=G.SORT_DESC),
        nullPointMode=G.NULL_AS_NULL,
    )
    kwargs_with_defaults.update(kwargs)

    return prometheus.PromGraph(data_source=PROMETHEUS, *args, **kwargs_with_defaults)
示例#2
0
def stacked(graph):
    """Turn a graph into a stacked graph."""
    return attr.assoc(
        graph,
        lineWidth=0,
        nullPointMode=G.NULL_AS_ZERO,
        stack=True,
        fill=10,
        tooltip=G.Tooltip(valueType=G.INDIVIDUAL, ),
    )
示例#3
0
def stacked(graph):
    """Turn a graph into a stacked graph."""
    return attr.evolve(
        graph,
        lineWidth=0,
        nullPointMode=G.NULL_AS_ZERO,
        stack=True,
        fill=10,
        tooltip=G.Tooltip(
            sort=G.SORT_DESC,
            valueType=G.INDIVIDUAL,
        ),
    )
示例#4
0
def stacked(graph):
    """Turn a graph into a stacked graph."""
    newGraph = dict(graph)  # Shallow copy.
    newGraph.update(
        dict(
            # Bit of a gotcha here. `graph` is a Python dictionary form of the
            # Grafana JSON object (i.e. an AST). In the Python DSL, we use
            # consistent camelCase naming, but Grafana itself is inconsistent.
            # Thus, what was specified as `lineWidth` when passed to Graph is
            # overridden as `linewidth` here.
            linewidth=0,
            nullPointMode=G.NULL_AS_ZERO,
            stack=True,
            fill=10,
            tooltip=G.Tooltip(valueType=G.INDIVIDUAL, ),
        ))
    return newGraph
示例#5
0
def make(prefix, title):
    def target(expr, **kw):
        return G.Target(expr=expr.format(prefix), **kw)

    return G.Dashboard(
        title=title,
        rows=[
            G.Row(panels=[
                G.SingleStat(
                    title='Pods up (web)',
                    dataSource='prometheus',
                    valueName='current',
                    sparkline=G.SparkLine(show=True),
                    targets=[
                        target(
                            expr=
                            'count by(service) (up{{service="{}-isaacranks-web"}} == 1)'
                        )
                    ]),
                G.SingleStat(
                    title='Pods up (rebuild)',
                    dataSource='prometheus',
                    valueName='current',
                    sparkline=G.SparkLine(show=True),
                    targets=[
                        target(
                            expr=
                            'count by(service) (up{{service="{}-isaacranks-rebuild"}} == 1)'
                        )
                    ]),
            ]),
            G.Row(panels=[
                G.Graph(
                    title='HTTP RPS',
                    dataSource='prometheus',
                    targets=[
                        target(
                            expr=
                            'service_status:http_request_duration_seconds_count:irate{{service="{}-isaacranks-web",status_code=~"1.."}}',
                            legendFormat='1xx',
                            refId='A'),
                        target(
                            expr=
                            'service_status:http_request_duration_seconds_count:irate{{service="{}-isaacranks-web",status_code=~"2.."}}',
                            legendFormat='2xx',
                            refId='B'),
                        target(
                            expr=
                            'service_status:http_request_duration_seconds_count:irate{{service="{}-isaacranks-web",status_code=~"3.."}}',
                            legendFormat='3xx',
                            refId='C'),
                        target(
                            expr=
                            'service_status:http_request_duration_seconds_count:irate{{service="{}-isaacranks-web",status_code=~"4.."}}',
                            legendFormat='4xx',
                            refId='D'),
                        target(
                            expr=
                            'service_status:http_request_duration_seconds_count:irate{{service="{}-isaacranks-web",status_code=~"5.."}}',
                            legendFormat='5xx',
                            refId='E'),
                    ],
                    aliasColors=ALIAS_COLORS,
                    yAxes=[
                        G.YAxis(format=G.OPS_FORMAT),
                        G.YAxis(format=G.SHORT_FORMAT, show=False)
                    ],
                    nullPointMode=G.NULL_AS_ZERO,
                    stack=True,
                    lineWidth=0,
                    fill=10,
                    tooltip=G.Tooltip(valueType=G.INDIVIDUAL)),
                G.Graph(
                    title='HTTP latency',
                    dataSource='prometheus',
                    targets=[
                        target(
                            expr=
                            'service:http_request_duration_seconds:50p{{service="{}-isaacranks-web"}} * 1000',
                            legendFormat='0.5q',
                            refId='A'),
                        target(
                            expr=
                            'service:http_request_duration_seconds:90p{{service="{}-isaacranks-web"}} * 1000',
                            legendFormat='0.9q',
                            refId='B'),
                        target(
                            expr=
                            'service:http_request_duration_seconds:99p{{service="{}-isaacranks-web"}} * 1000',
                            legendFormat='0.99q',
                            refId='C'),
                    ],
                    aliasColors=ALIAS_COLORS,
                    yAxes=[
                        G.YAxis(format=G.MILLISECONDS_FORMAT),
                        G.YAxis(format=G.SHORT_FORMAT, show=False)
                    ]),
            ]),
            G.Row(panels=[
                G.Graph(
                    title='Ballots',
                    dataSource='prometheus',
                    targets=[
                        target(
                            expr=
                            'service_version:isaacranks_ballot_generation_seconds_count:irate{{service="{}-isaacranks-web"}}',
                            legendFormat='{{version}}',
                            refId='A')
                    ],
                    yAxes=[
                        G.YAxis(format=G.OPS_FORMAT),
                        G.YAxis(format=G.SHORT_FORMAT, show=False),
                    ],
                    nullPointMode=G.NULL_AS_ZERO,
                    stack=True,
                    lineWidth=0,
                    fill=10,
                    tooltip=G.Tooltip(valueType=G.INDIVIDUAL)),
                G.Graph(
                    title='Ballot latency',
                    dataSource='prometheus',
                    targets=[
                        target(
                            expr=
                            'service:isaacranks_ballot_generation_seconds:50p{{service="{}-isaacranks-web"}} * 1000',
                            legendFormat='0.5q',
                            refId='A'),
                        target(
                            expr=
                            'service:isaacranks_ballot_generation_seconds:90p{{service="{}-isaacranks-web"}} * 1000',
                            legendFormat='0.9q',
                            refId='B'),
                        target(
                            expr=
                            'service:isaacranks_ballot_generation_seconds:99p{{service="{}-isaacranks-web"}} * 1000',
                            legendFormat='0.99q',
                            refId='C'),
                    ],
                    yAxes=[
                        G.YAxis(format=G.MILLISECONDS_FORMAT),
                        G.YAxis(format=G.SHORT_FORMAT, show=False)
                    ]),
            ]),
            G.Row(panels=[
                G.Graph(
                    title='Votes',
                    dataSource='prometheus',
                    targets=[
                        target(
                            expr=
                            'service_version:isaacranks_vote_casting_seconds_count:irate{{service="{}-isaacranks-web"}}',
                            legendFormat='{{version}}',
                            refId='A')
                    ],
                    yAxes=[
                        G.YAxis(format=G.OPS_FORMAT),
                        G.YAxis(format=G.SHORT_FORMAT, show=False)
                    ],
                    nullPointMode=G.NULL_AS_ZERO,
                    stack=True,
                    lineWidth=0,
                    fill=10,
                    tooltip=G.Tooltip(valueType=G.INDIVIDUAL)),
                G.Graph(
                    title='Vote latency',
                    dataSource='prometheus',
                    targets=[
                        target(
                            expr=
                            'service:isaacranks_vote_casting_seconds:50p{{service="{}-isaacranks-web"}} * 1000',
                            legendFormat='0.5q',
                            refId='A'),
                        target(
                            expr=
                            'service:isaacranks_vote_casting_seconds:90p{{service="{}-isaacranks-web"}} * 1000',
                            legendFormat='0.9q',
                            refId='B'),
                        target(
                            expr=
                            'service:isaacranks_vote_casting_seconds:99p{{service="{}-isaacranks-web"}} * 1000',
                            legendFormat='0.99q',
                            refId='C'),
                    ],
                    yAxes=[
                        G.YAxis(format=G.MILLISECONDS_FORMAT),
                        G.YAxis(format=G.SHORT_FORMAT, show=False)
                    ]),
            ]),
            G.Row(panels=[
                G.Graph(
                    title='Time since last rebuild',
                    dataSource='prometheus',
                    targets=[
                        target(
                            expr=
                            'time() - (isaacranks_last_rebuild_timestamp{{service="{}-isaacranks-rebuild"}} != 0)',
                            legendFormat='Age')
                    ],
                    legend=G.Legend(current=True),
                    yAxes=[
                        G.YAxis(format=G.SECONDS_FORMAT),
                        G.YAxis(format=G.SHORT_FORMAT, show=False)
                    ]),
                G.Graph(
                    title='Rebuild duration',
                    dataSource='prometheus',
                    targets=[
                        target(
                            expr=
                            'isaacranks_last_rebuild_duration_seconds{{service="{}-isaacranks-rebuild"}} != 0',
                            legendFormat='Duration')
                    ],
                    legend=G.Legend(current=True),
                    yAxes=[
                        G.YAxis(format=G.SECONDS_FORMAT),
                        G.YAxis(format=G.SHORT_FORMAT, show=False)
                    ]),
            ])
        ]).auto_panel_ids()
示例#6
0
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re
import attr
from grafanalib import core as g

DECREASING_ORDER_TOOLTIP = g.Tooltip(sort=g.SORT_DESC)
PANEL_HEIGHT = g.Pixels(300)
QUANTILES = [0.99, 0.9, 0.5]

SOURCE_TEMPLATE = g.Template(name="source",
                             type="datasource",
                             query="prometheus")


@attr.s
class Dashboard(g.Dashboard):
    time = attr.ib(default=g.Time("now-30d", "now"))
    # Make it possible to use $source as a source.
    templating = attr.ib(default=g.Templating(list=[SOURCE_TEMPLATE]))

def dashboard():
    PROMETHEUS = "prometheus"
    return G.Dashboard(
        title="S4",
        rows=[
            G.Row(panels=[
                G.Graph(
                    title="Signups",
                    dataSource=PROMETHEUS,
                    xAxis=X_TIME,
                    yAxes=[
                        G.YAxis(
                            format="none",
                            label="Count",
                        ),
                        G.YAxis(
                            format="none",
                            label="Count",
                        ),
                    ],
                    targets=[
                        G.Target(
                            # Filter down to just the signup pod since that's
                            # the only one where this metric value is
                            # meaningful.  Some other pods report a 0 value
                            # for this metric because they happen to import
                            # the Python code that defines the object
                            # representing it.
                            #
                            # Also, sum over the selected series to account
                            # for pod replacement.
                            expr='sum(wormhole_signup_started{pod=~"s4-signup.*"})',
                            legendFormat="Wormhole Signups Started",
                            refId="A",
                        ),
                        G.Target(
                            expr='sum(wormhole_signup_success{pod=~"s4-signup.*"})',
                            legendFormat="Wormhole Signups Completed",
                            refId="B",
                        ),
                        G.Target(
                            expr='sum(wormhole_signup_failure{pod=~"s4-signup.*"})',
                            legendFormat="Wormhole Signups Failed",
                            refId="C",
                        ),
                    ],
                ),
                G.Graph(
                    title="Usage",
                    dataSource=PROMETHEUS,

                    # Stack the connection graphs on each other, revealing
                    # both a total and a distribution across different grid
                    # router instances.
                    stack=True,
                    tooltip=G.Tooltip(
                        valueType=G.INDIVIDUAL,
                    ),

                    xAxis=X_TIME,
                    yAxes=[
                        G.YAxis(
                            format="none",
                            label="Count",
                        ),
                        G.YAxis(
                            format="none",
                            label="Count",
                        ),
                    ],
                    targets=[
                        G.Target(
                            expr="grid_router_connections",
                            legendFormat="Tahoe-LAFS Connections",
                            refId="D",
                        ),
                    ],
                ),
                last_convergence(PROMETHEUS),
            ]),
            G.Row(
                title="Cluster",
                panels=[
                    cpu_usage(PROMETHEUS, ["1m", "5m", "10m"]),
                    memory_usage(PROMETHEUS),
                    network_usage(PROMETHEUS),
                    filesystem_usage(PROMETHEUS),
                ],
            ),
            G.Row(
                title="Cluster2",
                panels=[
                    process_open_fds(PROMETHEUS),
                ],
            ),
            G.Row(panels=[
                tahoe_lafs_transfer_rate(PROMETHEUS),
                s4_customer_deployments(PROMETHEUS),
                unhandled_errors(PROMETHEUS),
            ]),
        ],
    ).auto_panel_ids()
def dashboard():
    PROMETHEUS = "prometheus"
    return G.Dashboard(
        title="S4",
        rows=[
            G.Row(panels=[
                G.Graph(
                    title="Signups",
                    dataSource=PROMETHEUS,
                    xAxis=X_TIME,
                    yAxes=[
                        G.YAxis(
                            format="none",
                            label="Count",
                        ),
                        G.YAxis(
                            format="none",
                            label="Count",
                        ),
                    ],
                    targets=[
                        G.Target(
                            expr='wormhole_signup_started{pod=~"s4-signup.*"}',
                            legendFormat="Wormhole Signups Started",
                            refId="A",
                        ),
                        G.Target(
                            expr='wormhole_signup_success{pod=~"s4-signup.*"}',
                            legendFormat="Wormhole Signups Completed",
                            refId="B",
                        ),
                        G.Target(
                            expr='wormhole_signup_failure{pod=~"s4-signup.*"}',
                            legendFormat="Wormhole Signups Failed",
                            refId="C",
                        ),
                    ],
                ),
                G.Graph(
                    title="Usage",
                    dataSource=PROMETHEUS,

                    # Stack the connection graphs on each other, revealing
                    # both a total and a distribution across different grid
                    # router instances.
                    stack=True,
                    tooltip=G.Tooltip(
                        valueType=G.INDIVIDUAL,
                    ),

                    xAxis=X_TIME,
                    yAxes=[
                        G.YAxis(
                            format="none",
                            label="Count",
                        ),
                        G.YAxis(
                            format="none",
                            label="Count",
                        ),
                    ],
                    targets=[
                        G.Target(
                            expr="grid_router_connections",
                            legendFormat="Tahoe-LAFS Connections",
                            refId="D",
                        ),
                    ],
                ),
            ]),
            G.Row(
                title="Cluster",
                panels=[
                    cpu_usage(PROMETHEUS, ["1m", "5m", "10m"]),
                    memory_usage(PROMETHEUS),
                    network_usage(PROMETHEUS),
                    filesystem_usage(PROMETHEUS),
                ],
            ),
            G.Row(panels=[
                G.SingleStat(
                    title='Current Customer Deployments',
                    dataSource='prometheus',
                    valueName='current',
                    sparkline=G.SparkLine(show=True),
                    targets=[
                        G.Target(
                            expr='s4_deployment_gauge',
                            refId="E",
                        ),
                    ],
                ),
                G.SingleStat(
                    title='Unhandled Errors',
                    dataSource='prometheus',
                    valueName='current',
                    sparkline=G.SparkLine(show=True),
                    targets=[
                        G.Target(
                            expr='s4_unhandled_error_counter',
                            refId="F",
                        ),
                    ],
                ),
            ]),
        ],
    ).auto_panel_ids()
示例#9
0
             G.Target(
                 expr=
                 'service_status:http_request_duration_seconds_count:irate{service="ucdapi",status_code=~"5.."}',
                 legendFormat='5xx',
                 refId='E'),
         ],
         aliasColors=ALIAS_COLORS,
         yAxes=[
             G.YAxis(format=G.OPS_FORMAT),
             G.YAxis(format=G.SHORT_FORMAT, show=False)
         ],
         nullPointMode=G.NULL_AS_ZERO,
         stack=True,
         lineWidth=0,
         fill=10,
         tooltip=G.Tooltip(valueType=G.INDIVIDUAL)),
     G.Graph(
         title='RPS',
         dataSource='prometheus',
         targets=[
             G.Target(
                 expr=
                 'sum(irate(http_request_duration_seconds_count{service="ucdapi"}[1m])) by (status_code, method)',
             ),
         ],
         yAxes=[
             G.YAxis(format=G.OPS_FORMAT),
             G.YAxis(format=G.SHORT_FORMAT),
         ]),
 ]),
 G.Row(panels=[