Пример #1
0
def main():
    # Modify the behavior of the PostgreSQL package installation
    # before any packages are installed. We do this here, rather than
    # in handlers, so that extra_packages declared by the operator
    # don't drag in the PostgreSQL packages as dependencies before
    # the environment tweaks have been made.
    if (not reactive.is_state('apt.installed.postgresql-common') and
            not reactive.is_state('postgresql.cluster.inhibited')):
        generate_locale()
        inhibit_default_cluster_creation()
        install_postgresql_packages()
        install_extra_packages()  # Deprecated extra-packages option

    # Don't trust this state from the last hook. Daemons may have
    # crashed and servers rebooted since then.
    if reactive.is_state('postgresql.cluster.created'):
        try:
            reactive.toggle_state('postgresql.cluster.is_running',
                                  postgresql.is_running())
        except subprocess.CalledProcessError as x:
            if not reactive.is_state('workloadstatus.blocked'):
                status_set('blocked',
                           'Local PostgreSQL cluster is corrupt: {}'
                           ''.format(x.stderr))

    # Reconfigure PostgreSQL. While we don't strictly speaking need
    # to do this every hook, we do need to do this almost every hook,
    # since even things like the number of peers or number of clients
    # can affect minimum viable configuration settings.
    reactive.remove_state('postgresql.cluster.configured')

    log_states()  # Debug noise.
Пример #2
0
    def test_is_running(self, check_call, pg_ctl_path, data_dir, version):
        version.return_value = '9.2'
        pg_ctl_path.return_value = '/path/to/pg_ctl'
        data_dir.return_value = '/path/to/DATADIR'
        self.assertTrue(postgresql.is_running())
        check_call.assert_called_once_with([
            'sudo', '-u', 'postgres', '/path/to/pg_ctl', 'status', '-D',
            '/path/to/DATADIR'
        ],
                                           universal_newlines=True,
                                           stdout=subprocess.DEVNULL)

        # Exit code 3 is pg_ctl(1) speak for 'not running'
        check_call.side_effect = subprocess.CalledProcessError(3, 'whoops')
        self.assertFalse(postgresql.is_running())

        # Exit code 4 is pg_ctl(1) speak for 'wtf is the $DATADIR', PG9.4+
        version.return_value = '9.4'
        check_call.side_effect = subprocess.CalledProcessError(4, 'whoops')
        self.assertFalse(postgresql.is_running())
        version.return_value = '9.3'
        check_call.side_effect = subprocess.CalledProcessError(4, 'whoops')
        with self.assertRaises(subprocess.CalledProcessError) as x:
            postgresql.is_running()
        self.assertEqual(x.exception.returncode, 4)

        # Other failures bubble up, not that they should occur.
        check_call.side_effect = subprocess.CalledProcessError(42, 'whoops')
        with self.assertRaises(subprocess.CalledProcessError) as x:
            postgresql.is_running()
        self.assertEqual(x.exception.returncode, 42)
Пример #3
0
    def test_is_running_91(self, check_call, pg_ctl_path, data_dir, version, pid_path):
        version.return_value = "9.1"
        pg_ctl_path.return_value = "/path/to/pg_ctl"
        data_dir.return_value = "/path/to/DATADIR"
        self.assertTrue(postgresql.is_running())
        check_call.assert_called_once_with(
            ["sudo", "-u", "postgres", "/path/to/pg_ctl", "status", "-D", "/path/to/DATADIR"],
            universal_newlines=True,
            stdout=subprocess.DEVNULL,
        )

        # Exit code 1 is all we get from pg_ctl(1) with PostgreSQL 9.1
        check_call.side_effect = subprocess.CalledProcessError(1, "whoops")

        # If the pid file exists, and pg_ctl failed, the failure is raised.
        with tempfile.NamedTemporaryFile() as f:
            pid_path.return_value = f.name
            with self.assertRaises(subprocess.CalledProcessError) as x:
                postgresql.is_running()
            self.assertEqual(x.exception.returncode, 1)

        # If the pid file does not exist, and pg_ctl failed, we assume
        # PostgreSQL is not running.
        self.assertFalse(postgresql.is_running())

        # Other failures bubble up, not that they should occur.
        check_call.side_effect = subprocess.CalledProcessError(42, "whoops")
        with self.assertRaises(subprocess.CalledProcessError) as x:
            postgresql.is_running()
        self.assertEqual(x.exception.returncode, 42)
Пример #4
0
def set_active():
    if postgresql.is_running():
        if replication.is_master():
            msg = "Live master"
        elif postgresql.is_primary():
            msg = "Live primary"
        else:
            msg = "Live secondary"
        status_set("active", "{} ({})".format(msg, postgresql.point_version()))
    else:
        # PostgreSQL crashed! Maybe bad configuration we failed to
        # pick up, or maybe a full disk. The admin will need to diagnose.
        status_set("blocked", "PostgreSQL unexpectedly shut down")
Пример #5
0
def set_active():
    if postgresql.is_running():
        if replication.is_master():
            msg = 'Live master'
        elif postgresql.is_primary():
            msg = 'Live primary'
        else:
            msg = 'Live secondary'
        status_set('active', '{} ({})'.format(msg, postgresql.point_version()))
    else:
        # PostgreSQL crashed! Maybe bad configuration we failed to
        # pick up, or maybe a full disk. The admin will need to diagnose.
        status_set('blocked', 'PostgreSQL unexpectedly shut down')
Пример #6
0
def elect_master():
    """Elect a new master after the old one has departed.

    The new master is the secondary that has replayed the most
    WAL data. There must be no hot standbys still replicating
    data from the previous master, or we may end up with diverged
    timelines.

    Note we check replayed wal instead of received wal, because the
    servers have just been restarted with no master and information
    about received wal lost.
    """
    rel = helpers.get_peer_relation()
    local_unit = hookenv.local_unit()

    # The unit with the most advanced WAL offset should be the new master.
    if postgresql.is_running():
        local_offset = postgresql.wal_replay_offset(postgresql.connect())
        offsets = [(local_offset, local_unit)]
    else:
        offsets = []

    for unit, relinfo in rel.items():
        try:
            con = postgresql.connect(user=replication_username(), unit=unit)
            offsets.append((postgresql.wal_replay_offset(con), unit))
        except (psycopg2.Error, postgresql.InvalidConnection) as x:
            hookenv.log(
                "Unable to query replication state of {}: {}"
                "".format(unit, x),
                WARNING,
            )
            # TODO: Signal re-cloning required. Or autodetect
            # based on timeline switch. Or PG9.3+ could use pg_rewind.

    offsets.sort()
    if not offsets:
        # This should only happen if we failover before replication has
        # been setup, like a test suite destroying units without waiting
        # for the initial deployment to complete.
        status_set("blocked", "No candidates for master found!")
        raise SystemExit(0)
    elected_master = offsets[0][1]
    return elected_master
Пример #7
0
    def test_is_running(self, check_call, pg_ctl_path, data_dir, version):
        version.return_value = "9.2"
        pg_ctl_path.return_value = "/path/to/pg_ctl"
        data_dir.return_value = "/path/to/DATADIR"
        self.assertTrue(postgresql.is_running())
        check_call.assert_called_once_with(
            ["sudo", "-u", "postgres", "/path/to/pg_ctl", "status", "-D", "/path/to/DATADIR"],
            universal_newlines=True,
            stdout=subprocess.DEVNULL,
        )

        # Exit code 3 is pg_ctl(1) speak for 'not running', PG9.2+
        version.return_value = "9.2"
        check_call.side_effect = subprocess.CalledProcessError(3, "whoops")
        self.assertFalse(postgresql.is_running())
        version.return_value = "9.1"
        check_call.side_effect = subprocess.CalledProcessError(3, "whoops")
        with self.assertRaises(subprocess.CalledProcessError) as x:
            postgresql.is_running()
        self.assertEqual(x.exception.returncode, 3)

        # Exit code 4 is pg_ctl(1) speak for 'wtf is the $DATADIR', PG9.4+
        version.return_value = "9.4"
        check_call.side_effect = subprocess.CalledProcessError(4, "whoops")
        self.assertFalse(postgresql.is_running())
        version.return_value = "9.3"
        check_call.side_effect = subprocess.CalledProcessError(4, "whoops")
        with self.assertRaises(subprocess.CalledProcessError) as x:
            postgresql.is_running()
        self.assertEqual(x.exception.returncode, 4)

        # Other failures bubble up, not that they should occur.
        check_call.side_effect = subprocess.CalledProcessError(42, "whoops")
        with self.assertRaises(subprocess.CalledProcessError) as x:
            postgresql.is_running()
        self.assertEqual(x.exception.returncode, 42)
Пример #8
0
def main():
    if not (reactive.is_state("postgresql.cluster.created") or reactive.is_state("postgresql.cluster.initial-check")):
        # We need to check for existance of an existing database,
        # before the main PostgreSQL package has been installed.
        # If there is one, abort rather than risk destroying data.
        # We need to do this here, as the apt layer may pull in
        # the main PostgreSQL package through dependencies, per
        # lp:1749284
        if os.path.exists(postgresql.postgresql_conf_path()):
            hookenv.status_set(
                "blocked",
                "PostgreSQL config from previous install found at {}".format(postgresql.postgresql_conf_path()),
            )
        elif os.path.exists(postgresql.data_dir()):
            hookenv.status_set(
                "blocked",
                "PostgreSQL database from previous install found at {}".format(postgresql.postgresql.data_dir()),
            )
        else:
            hookenv.log("No pre-existing PostgreSQL database found")
            reactive.set_state("postgresql.cluster.initial-check")

    # Don't trust this state from the last hook. Daemons may have
    # crashed and servers rebooted since then.
    if reactive.is_state("postgresql.cluster.created"):
        try:
            reactive.toggle_state("postgresql.cluster.is_running", postgresql.is_running())
        except subprocess.CalledProcessError as x:
            if not reactive.is_state("workloadstatus.blocked"):
                status_set(
                    "blocked",
                    "Local PostgreSQL cluster is corrupt: {}".format(x.stderr),
                )

    # Reconfigure PostgreSQL. While we don't strictly speaking need
    # to do this every hook, we do need to do this almost every hook,
    # since even things like the number of peers or number of clients
    # can affect minimum viable configuration settings.
    reactive.remove_state("postgresql.cluster.configured")

    log_states()  # Debug noise.
Пример #9
0
def wal_e_restore():
    reactive.remove_state("action.wal-e-restore")
    params = hookenv.action_get()
    backup = params["backup-name"].strip().replace("-", "_")
    storage_uri = params["storage-uri"].strip()

    ship_uri = hookenv.config().get("wal_e_storage_uri")
    if storage_uri == ship_uri:
        hookenv.action_fail(
            "The storage-uri parameter is identical to "
            "the wal_e_storage_uri config setting. Your "
            "restoration source cannot be the same as the "
            "folder you are archiving too to avoid corrupting "
            "the backups."
        )
        return

    if not params["confirm"]:
        m = "Recovery from {}.".format(storage_uri)
        if ship_uri:
            m += "\nContents of {} will be destroyed.".format(ship_uri)
        m += "\nExisting local database will be destroyed."
        m += "\nRerun action with 'confirm=true' to proceed."
        hookenv.action_set({"info": m})
        return

    with tempfile.TemporaryDirectory(prefix="wal-e", suffix="envdir") as envdir:
        update_wal_e_env_dir(envdir, storage_uri)

        # Confirm there is a backup to restore
        backups = wal_e_list_backups(envdir)
        if not backups:
            hookenv.action_fail("No backups found at {}".format(storage_uri))
            return
        if backup != "LATEST" and backup not in (b["name"] for b in backups):
            hookenv.action_fail("Backup {} not found".format(backup))
            return

        # Shutdown PostgreSQL. Note we want this action to run synchronously,
        # so there is no opportunity to ask permission from the leader. If
        # there are other units cloning this database, those clone operations
        # will fail. Which seems preferable to blocking a recovery operation
        # in any case, because if we are doing disaster recovery we generally
        # want to do it right now.
        status_set("maintenance", "Stopping PostgreSQL for backup restoration")
        postgresql.stop()

        # Trash the existing database. Its dangerous to do this first, but
        # we probably need the space.
        data_dir = postgresql.data_dir()  # May be a symlink
        for content in os.listdir(data_dir):
            cpath = os.path.join(data_dir, content)
            if os.path.isdir(cpath) and not os.path.islink(cpath):
                shutil.rmtree(cpath)
            else:
                os.remove(cpath)

        # WAL-E recover
        status_set("maintenance", "Restoring backup {}".format(backup))
        wal_e_run(["backup-fetch", data_dir, backup], envdir=envdir)

        # Create recovery.conf to complete recovery
        is_master = reactive.is_state("postgresql.replication.is_master")
        standby_mode = "off" if is_master else "on"
        if params.get("target-time"):
            target_time = "recovery_target_time='{}'" "".format(params["target-time"])
        else:
            target_time = ""
        target_action = "promote" if is_master else "shutdown"
        immediate = "" if is_master else "recovery_target='immediate'"
        helpers.write(
            postgresql.recovery_conf_path(),
            dedent(
                """\
                             # Managed by Juju. PITR in progress.
                             standby_mode = {}
                             restore_command='{}'
                             recovery_target_timeline = {}
                             recovery_target_action = {}
                             {}
                             {}
                             """
            ).format(
                standby_mode,
                wal_e_restore_command(envdir=envdir),
                params["target-timeline"],
                target_action,
                target_time,
                immediate,
            ),
            mode=0o600,
            user="******",
            group="postgres",
        )

        # Avoid circular import. We could also avoid the import entirely
        # with a sufficiently complex set of handlers in the replication
        # module, but that seems to be a worse solution. Better to break
        # out this action into a separate module.
        from reactive.postgresql import replication

        if is_master:
            if ship_uri:
                # If master, trash the configured wal-e storage. This may
                # contain WAL and backups from the old cluster which will
                # conflict with the new cluster. Hopefully it does not
                # contain anything important, because we have no way to
                # prompt the user for confirmation.
                wal_e_run(["delete", "--confirm", "everything"])

            # Then, wait for recovery and promotion.
            postgresql.start()
            con = postgresql.connect()
            cur = con.cursor()
            while True:
                if postgresql.has_version("10"):
                    cur.execute(
                        """SELECT pg_is_in_recovery(),
                                          pg_last_wal_replay_lsn()"""
                    )
                else:
                    cur.execute(
                        """SELECT pg_is_in_recovery(),
                                          pg_last_xlog_replay_location()"""
                    )
                in_rec, loc = cur.fetchone()
                if not in_rec:
                    break
                status_set("maintenance", "Recovery at {}".format(loc))
                time.sleep(10)
        else:
            # If standby, startup and wait for recovery to complete and
            # shutdown.
            status_set("maintenance", "Recovery")
            # Startup might shutdown immediately and look like a failure.
            postgresql.start(ignore_failure=True)
            # No recovery point status yet for standbys, as we would need
            # to handle connection failures when the DB shuts down. We
            # should do this.
            while postgresql.is_running():
                time.sleep(5)
            replication.update_recovery_conf(follow=replication.get_master())

    # Reactive handlers will deal with the rest of the cleanup.
    # eg. ensuring required users and roles exist
    replication.update_replication_states()
    reactive.remove_state("postgresql.cluster.configured")
    reactive.toggle_state("postgresql.cluster.is_running", postgresql.is_running())
    reactive.remove_state("postgresql.nagios.user_ensured")
    reactive.remove_state("postgresql.replication.replication_user_created")
    reactive.remove_state("postgresql.client.published")