Пример #1
0
def update_nrpe_config():
    update_nagios_pgpass()
    nrpe = NRPE()

    user = nagios_username()
    port = postgresql.port()
    nrpe.add_check(shortname="pgsql", description="Check pgsql", check_cmd="check_pgsql -P {} -l {}".format(port, user))

    if reactive.is_state("postgresql.replication.is_master"):
        # TODO: These should be calcualted from the backup schedule,
        # which is difficult since that is specified in crontab format.
        warn_age = 172800
        crit_age = 194400
        backups_log = helpers.backups_log_path()
        nrpe.add_check(
            shortname="pgsql_backups",
            description="Check pgsql backups",
            check_cmd=("check_file_age -w {} -c {} -f {}" "".format(warn_age, crit_age, backups_log)),
        )
    else:
        # Standbys don't do backups. We still generate a check though,
        # to ensure alerts get through to monitoring after a failover.
        nrpe.add_check(
            shortname="pgsql_backups",
            description="Check pgsql backups",
            check_cmd=r"check_dummy 0 standby_does_not_backup",
        )
    nrpe.write()
    reactive.remove_state("postgresql.nagios.needs_update")
Пример #2
0
    def test_port(self, pgconf_path):
        # Pull the configured port from postgresql.conf.
        with tempfile.NamedTemporaryFile("w") as pgconf:
            pgconf.write("# Some rubbish\n")
            pgconf.write(" Port = 1234 # Picked by pg_createcluster(1)\n")
            pgconf.flush()
            pgconf_path.return_value = pgconf.name
            self.assertEqual(postgresql.port(), 1234)

        with tempfile.NamedTemporaryFile("w") as pgconf:
            pgconf.write("port='1235'\n")
            pgconf.write("# Some rubbish\n")
            pgconf.flush()
            pgconf_path.return_value = pgconf.name
            self.assertEqual(postgresql.port(), 1235)

        with tempfile.NamedTemporaryFile("w") as pgconf:
            pgconf_path.return_value = pgconf.name
            self.assertEqual(postgresql.port(), 5432)  # Fallback to default.
Пример #3
0
    def test_port(self, pgconf_path):
        # Pull the configured port from postgresql.conf.
        with tempfile.NamedTemporaryFile('w') as pgconf:
            pgconf.write('# Some rubbish\n')
            pgconf.write(' Port = 1234 # Picked by pg_createcluster(1)\n')
            pgconf.flush()
            pgconf_path.return_value = pgconf.name
            self.assertEqual(postgresql.port(), 1234)

        with tempfile.NamedTemporaryFile('w') as pgconf:
            pgconf.write("port='1235'\n")
            pgconf.write('# Some rubbish\n')
            pgconf.flush()
            pgconf_path.return_value = pgconf.name
            self.assertEqual(postgresql.port(), 1235)

        with tempfile.NamedTemporaryFile('w') as pgconf:
            pgconf_path.return_value = pgconf.name
            self.assertEqual(postgresql.port(), 5432)  # Fallback to default.
Пример #4
0
def db_relation_common(rel):
    """Publish unit specific relation details."""
    local = rel.local
    if "database" not in local:
        return  # Not yet ready.

    # Version number, allowing clients to adjust or block if their
    # expectations are not met.
    local["version"] = postgresql.version()

    # Calculate the state of this unit. 'standalone' will disappear
    # in a future version of this interface, as this state was
    # only needed to deal with race conditions now solved by
    # Juju leadership. We check for is_primary() rather than
    # the postgresql.replication.is_master reactive state to
    # publish the correct state when we are using manual replication
    # (there might be multiple independent masters, possibly useful for
    # sharding, or perhaps this is a multi master BDR setup).
    if postgresql.is_primary():
        if reactive.helpers.is_state("postgresql.replication.has_peers"):
            local["state"] = "master"
        else:
            local["state"] = "standalone"
    else:
        local["state"] = "hot standby"

    # Host is the private ip address, but this might change and
    # become the address of an attached proxy or alternative peer
    # if this unit is in maintenance.
    local["host"] = hookenv.unit_private_ip()

    # Port will be 5432, unless the user has overridden it or
    # something very weird happened when the packages where installed.
    local["port"] = str(postgresql.port())

    # The list of remote units on this relation granted access.
    # This is to avoid the race condition where a new client unit
    # joins an existing client relation and sees valid credentials,
    # before we have had a chance to grant it access.
    local["allowed-units"] = " ".join(unit for unit, relinfo in rel.items() if "private-address" in relinfo)
Пример #5
0
def db_relation_common(rel):
    """Publish unit specific relation details."""
    local = rel.local
    if "database" not in local:
        return  # Not yet ready.

    # Version number, allowing clients to adjust or block if their
    # expectations are not met.
    local["version"] = postgresql.version()

    # Calculate the state of this unit. 'standalone' will disappear
    # in a future version of this interface, as this state was
    # only needed to deal with race conditions now solved by
    # Juju leadership. We check for is_primary() rather than
    # the postgresql.replication.is_master reactive state to
    # publish the correct state when we are using manual replication
    # (there might be multiple independent masters, possibly useful for
    # sharding, or perhaps this is a multi master BDR setup).
    if postgresql.is_primary():
        if reactive.helpers.is_state("postgresql.replication.has_peers"):
            local["state"] = "master"
        else:
            local["state"] = "standalone"
    else:
        local["state"] = "hot standby"

    # Host is the private ip address, but this might change and
    # become the address of an attached proxy or alternative peer
    # if this unit is in maintenance.
    local["host"] = ingress_address(local.relname, local.relid)

    # Port will be 5432, unless the user has overridden it or
    # something very weird happened when the packages where installed.
    local["port"] = str(postgresql.port())

    # The list of remote units on this relation granted access.
    # This is to avoid the race condition where a new client unit
    # joins an existing client relation and sees valid credentials,
    # before we have had a chance to grant it access.
    local["allowed-units"] = " ".join(unit for unit, relinfo in rel.items() if len(incoming_addresses(relinfo)) > 0)

    # The list of IP address ranges on this relation granted access.
    # This will replace allowed-units, which does not work with cross
    # model ralations due to the anonymization of the external client.
    local["allowed-subnets"] = ",".join(
        sorted({r: True for r in chain(*[incoming_addresses(relinfo) for relinfo in rel.values()])}.keys())
    )

    # v2 protocol. Publish connection strings for this unit and its peers.
    # Clients should use these connection strings in favour of the old
    # host, port, database settings. A single proxy unit can thus
    # publish several end points to clients.
    master = replication.get_master()
    if replication.is_master():
        master_relinfo = local
    else:
        master_relinfo = rel.peers.get(master)
    local["master"] = relinfo_to_cs(master_relinfo)
    if rel.peers:
        all_relinfo = rel.peers.values()
    all_relinfo = list(rel.peers.values()) if rel.peers else []
    all_relinfo.append(rel.local)
    standbys = filter(
        None,
        [relinfo_to_cs(relinfo) for relinfo in all_relinfo if relinfo.unit != master],
    )
    local["standbys"] = "\n".join(sorted(standbys)) or None
Пример #6
0
def update_nrpe_config():
    update_nagios_pgpass()
    nrpe = NRPE()

    user = nagios_username()
    port = postgresql.port()
    nrpe.add_check(
        shortname="pgsql",
        description="Check pgsql",
        check_cmd="check_pgsql -P {} -l {}".format(port, user),
    )

    # copy the check script which will run cronned as postgres user
    with open("scripts/find_latest_ready_wal.py") as fh:
        check_script = fh.read()

    check_script_path = "{}/{}".format(helpers.scripts_dir(), "find_latest_ready_wal.py")
    helpers.write(check_script_path, check_script, mode=0o755)

    # create an (empty) file with appropriate permissions for the above
    check_output_path = "/var/lib/nagios/postgres-wal-max-age.txt"
    if not os.path.exists(check_output_path):
        helpers.write(check_output_path, b"0\n", mode=0o644, user="******", group="postgres")

    # retrieve the threshold values from the charm config
    config = hookenv.config()
    check_warn_threshold = config["wal_archive_warn_threshold"] or 0
    check_crit_threshold = config["wal_archive_crit_threshold"] or 0

    check_cron_path = "/etc/cron.d/postgres-wal-archive-check"
    if check_warn_threshold and check_crit_threshold:
        # create the cron job to run the above
        check_cron = "*/2 * * * * postgres {}".format(check_script_path)
        helpers.write(check_cron_path, check_cron, mode=0o644)

    # copy the nagios plugin which will check the cronned output
    with open("scripts/check_latest_ready_wal.py") as fh:
        check_script = fh.read()
    check_script_path = "{}/{}".format("/usr/local/lib/nagios/plugins", "check_latest_ready_wal.py")
    helpers.write(check_script_path, check_script, mode=0o755)

    # write the nagios check definition
    nrpe.add_check(
        shortname="pgsql_stale_wal",
        description="Check for stale WAL backups",
        check_cmd="{} {} {}".format(check_script_path, check_warn_threshold, check_crit_threshold),
    )

    if reactive.is_state("postgresql.replication.is_master"):
        # TODO: These should be calculated from the backup schedule,
        # which is difficult since that is specified in crontab format.
        warn_age = 172800
        crit_age = 194400
        backups_log = helpers.backups_log_path()
        nrpe.add_check(
            shortname="pgsql_backups",
            description="Check pgsql backups",
            check_cmd=("check_file_age -w {} -c {} -f {}" "".format(warn_age, crit_age, backups_log)),
        )
    else:
        # Standbys don't do backups. We still generate a check though,
        # to ensure alerts get through to monitoring after a failover.
        nrpe.add_check(
            shortname="pgsql_backups",
            description="Check pgsql backups",
            check_cmd=r"check_dummy 0 standby_does_not_backup",
        )
    nrpe.write()
    reactive.remove_state("postgresql.nagios.needs_update")
Пример #7
0
def publish_replication_details():
    peer = helpers.get_peer_relation()
    if peer is not None:
        peer.local["host"] = hookenv.unit_private_ip()
        peer.local["port"] = str(postgresql.port())
        peer.local["allowed-units"] = " ".join(sorted(peer.keys()))