Exemplo n.º 1
0
    def create_basebackup(self,
                          site,
                          connection_info,
                          basebackup_path,
                          callback_queue=None,
                          metadata=None):
        connection_string, _ = replication_connection_string_and_slot_using_pgpass(
            connection_info)
        pg_version_server = self.check_pg_server_version(
            connection_string, site)
        if not self.check_pg_versions_ok(site, pg_version_server,
                                         "pg_basebackup"):
            if callback_queue:
                callback_queue.put({"success": False})
            return

        thread = PGBaseBackup(
            config=self.config,
            site=site,
            connection_info=connection_info,
            basebackup_path=basebackup_path,
            compression_queue=self.compression_queue,
            transfer_queue=self.transfer_queue,
            callback_queue=callback_queue,
            pg_version_server=pg_version_server,
            metrics=self.metrics,
            metadata=metadata,
        )
        thread.start()
        self.basebackups[site] = thread
Exemplo n.º 2
0
    def test_parse_backup_label(self, tmpdir):
        td = str(tmpdir)
        fn = os.path.join(td, "backup.tar")
        with tarfile.open(fn, "w") as tfile:
            with open(os.path.join(td, "backup_label"), "wb") as fp:
                fp.write(
                    b'''\
START WAL LOCATION: 0/4000028 (file 000000010000000000000004)
CHECKPOINT LOCATION: 0/4000060
BACKUP METHOD: streamed
BACKUP FROM: master
START TIME: 2015-02-12 14:07:19 GMT
LABEL: pg_basebackup base backup
'''
                )
            tfile.add(os.path.join(td, "backup_label"), arcname="backup_label")
        pgb = PGBaseBackup(
            config=None,
            site="foosite",
            connection_info=None,
            basebackup_path=None,
            compression_queue=None,
            storage=None,
            transfer_queue=None,
            metrics=metrics.Metrics(statsd={})
        )
        start_wal_segment, start_time = pgb.parse_backup_label_in_tar(fn)
        assert start_wal_segment == "000000010000000000000004"
        assert start_time == "2015-02-12T14:07:19+00:00"
Exemplo n.º 3
0
    def create_basebackup(self, site, connection_string, basebackup_path, callback_queue=None):
        pg_version_server = self.check_pg_server_version(connection_string)
        if not self.check_pg_versions_ok(pg_version_server, "pg_basebackup"):
            if callback_queue:
                callback_queue.put({"success": False})
            return None

        # Note that this xlog file value will only be correct if no other basebackups are run
        # in parallel. PGHoard itself will never do this itself but if the user starts
        # one on his own, and if tablespaces are set to False we'll get an incorrect
        # start-wal-time since the pg_basebackup from pghoard will not generate a
        # new checkpoint. This means that this xlog information would not be the oldest
        # required to restore from this basebackup.
        current_xlog = wal.get_current_wal_from_identify_system(connection_string)

        thread = PGBaseBackup(
            config=self.config,
            site=site,
            connection_string=connection_string,
            basebackup_path=basebackup_path,
            compression_queue=self.compression_queue,
            transfer_queue=self.transfer_queue,
            callback_queue=callback_queue,
            start_wal_segment=current_xlog)
        thread.start()
        self.basebackups[site] = thread
Exemplo n.º 4
0
    def create_basebackup(self,
                          site,
                          connection_string,
                          basebackup_path,
                          callback_queue=None):
        pg_version_server = self.check_pg_server_version(connection_string)
        if pg_version_server:
            self.config["backup_sites"][site]["pg_version"] = pg_version_server
        if not self.check_pg_versions_ok(pg_version_server, "pg_basebackup"):
            if callback_queue:
                callback_queue.put({"success": False})
            return None

        # Note that this xlog file value will only be correct if no other basebackups are run
        # in parallel. PGHoard itself will never do this itself but if the user starts
        # one on his own, and if tablespaces are set to False we'll get an incorrect
        # start-wal-time since the pg_basebackup from pghoard will not generate a
        # new checkpoint. This means that this xlog information would not be the oldest
        # required to restore from this basebackup.
        current_xlog = wal.get_current_wal_from_identify_system(
            connection_string)

        thread = PGBaseBackup(config=self.config,
                              site=site,
                              connection_string=connection_string,
                              basebackup_path=basebackup_path,
                              compression_queue=self.compression_queue,
                              transfer_queue=self.transfer_queue,
                              callback_queue=callback_queue,
                              start_wal_segment=current_xlog,
                              pg_version_server=pg_version_server)
        thread.start()
        self.basebackups[site] = thread
Exemplo n.º 5
0
    def _test_restore_basebackup(self, db, pghoard, tmpdir, active_backup_mode="archive_command"):
        backup_out = tmpdir.join("test-restore").strpath
        # Restoring to empty directory works
        os.makedirs(backup_out)
        Restore().run([
            "get-basebackup",
            "--config", pghoard.config_path,
            "--site", pghoard.test_site,
            "--target-dir", backup_out,
        ])
        # Restoring on top of another $PGDATA doesn't
        with pytest.raises(RestoreError) as excinfo:
            Restore().run([
                "get-basebackup",
                "--config", pghoard.config_path,
                "--site", pghoard.test_site,
                "--target-dir", backup_out,
            ])
        assert "--overwrite not specified" in str(excinfo.value)
        # Until we use the --overwrite flag
        Restore().run([
            "get-basebackup",
            "--config", pghoard.config_path,
            "--site", pghoard.test_site,
            "--target-dir", backup_out,
            "--overwrite",
        ])
        check_call([os.path.join(db.pgbin, "pg_controldata"), backup_out])
        # TODO: check that the backup is valid

        # there should only be a single backup so lets compare what was in the metadata with what
        # was in the backup label
        storage_config = common.get_object_storage_config(pghoard.config, pghoard.test_site)
        storage = get_transfer(storage_config)
        backups = storage.list_path(os.path.join(pghoard.config["backup_sites"][pghoard.test_site]["prefix"], "basebackup"))

        # lets grab the backup label details for what we restored
        pgb = PGBaseBackup(config=None, site="foosite", connection_info=None,
                           basebackup_path=None, compression_queue=None, transfer_queue=None,
                           metrics=metrics.Metrics(statsd={}))

        path = os.path.join(backup_out, "backup_label")
        with open(path, "r") as myfile:
            data = myfile.read()
            start_wal_segment, start_time = pgb.parse_backup_label(data)

        assert start_wal_segment == backups[0]['metadata']['start-wal-segment']
        assert start_time == backups[0]['metadata']['start-time']

        # for a standalone hot backup, the start wal file will be in the pg_xlog / pg_wal directory
        wal_dir = "pg_xlog"
        if float(db.pgver) >= float("10.0"):
            wal_dir = "pg_wal"

        path = os.path.join(backup_out, wal_dir, backups[0]['metadata']['start-wal-segment'])
        if active_backup_mode == "standalone_hot_backup":
            assert os.path.isfile(path) is True
        else:
            assert os.path.isfile(path) is False
Exemplo n.º 6
0
    def test_find_and_split_files_to_backup(self, tmpdir):
        pgdata = str(tmpdir.mkdir("pgdata"))
        top = os.path.join(pgdata, "split_top")
        sub = os.path.join(top, "split_sub")
        os.makedirs(sub, exist_ok=True)
        with open(os.path.join(top, "f1"), "w") as f:
            f.write("a" * 50000)
        with open(os.path.join(top, "f2"), "w") as f:
            f.write("a" * 50000)
        with open(os.path.join(top, "f3"), "w") as f:
            f.write("a" * 50000)
        with open(os.path.join(sub, "f1"), "w") as f:
            f.write("a" * 50000)
        with open(os.path.join(sub, "f2"), "w") as f:
            f.write("a" * 50000)
        with open(os.path.join(sub, "f3"), "w") as f:
            f.write("a" * 50000)

        pgb = PGBaseBackup(
            config=None,
            site="foosite",
            connection_info=None,
            basebackup_path=None,
            compression_queue=None,
            storage=None,
            transfer_queue=None,
            metrics=metrics.Metrics(statsd={})
        )
        total_file_count, chunks = pgb.find_and_split_files_to_backup(
            pgdata=pgdata, tablespaces={}, target_chunk_size=110000
        )
        # 6 files and 2 directories
        assert total_file_count == 8
        assert len(chunks) == 3
        print(chunks)

        # split_top, split_top/f1, split_top/f2
        chunk1 = [c[0] for c in chunks[0]]
        assert len(chunk1) == 3
        assert chunk1[0] == "pgdata/split_top"
        assert chunk1[1] == "pgdata/split_top/f1"
        assert chunk1[2] == "pgdata/split_top/f2"

        # split_top, split_top/f3, split_top/split_sub, split_top/split_sub/f1
        chunk2 = [c[0] for c in chunks[1]]
        assert len(chunk2) == 4
        assert chunk2[0] == "pgdata/split_top"
        assert chunk2[1] == "pgdata/split_top/f3"
        assert chunk2[2] == "pgdata/split_top/split_sub"
        assert chunk2[3] == "pgdata/split_top/split_sub/f1"

        # split_top, split_top/split_sub, split_top/split_sub/f2, split_top/split_sub/f3
        chunk3 = [c[0] for c in chunks[2]]
        assert len(chunk3) == 4
        assert chunk3[0] == "pgdata/split_top"
        assert chunk3[1] == "pgdata/split_top/split_sub"
        assert chunk3[2] == "pgdata/split_top/split_sub/f2"
        assert chunk3[3] == "pgdata/split_top/split_sub/f3"
Exemplo n.º 7
0
    def test_find_and_split_files_to_backup(self, tmpdir):
        pgdata = str(tmpdir.mkdir("pgdata"))
        top = os.path.join(pgdata, "split_top")
        sub = os.path.join(top, "split_sub")
        os.makedirs(sub, exist_ok=True)
        with open(os.path.join(top, "f1"), "w") as f:
            f.write("a" * 50000)
        with open(os.path.join(top, "f2"), "w") as f:
            f.write("a" * 50000)
        with open(os.path.join(top, "f3"), "w") as f:
            f.write("a" * 50000)
        with open(os.path.join(sub, "f1"), "w") as f:
            f.write("a" * 50000)
        with open(os.path.join(sub, "f2"), "w") as f:
            f.write("a" * 50000)
        with open(os.path.join(sub, "f3"), "w") as f:
            f.write("a" * 50000)

        pgb = PGBaseBackup(config=None, site="foosite", connection_info=None,
                           basebackup_path=None, compression_queue=None, transfer_queue=None,
                           metrics=metrics.Metrics(statsd={}))
        total_file_count, chunks = pgb.find_and_split_files_to_backup(
            pgdata=pgdata, tablespaces={}, target_chunk_size=110000
        )
        # 6 files and 2 directories
        assert total_file_count == 8
        assert len(chunks) == 3
        print(chunks)

        # split_top, split_top/f1, split_top/f2
        chunk1 = [c[0] for c in chunks[0]]
        assert len(chunk1) == 3
        assert chunk1[0] == "pgdata/split_top"
        assert chunk1[1] == "pgdata/split_top/f1"
        assert chunk1[2] == "pgdata/split_top/f2"

        # split_top, split_top/f3, split_top/split_sub, split_top/split_sub/f1
        chunk2 = [c[0] for c in chunks[1]]
        assert len(chunk2) == 4
        assert chunk2[0] == "pgdata/split_top"
        assert chunk2[1] == "pgdata/split_top/f3"
        assert chunk2[2] == "pgdata/split_top/split_sub"
        assert chunk2[3] == "pgdata/split_top/split_sub/f1"

        # split_top, split_top/split_sub, split_top/split_sub/f2, split_top/split_sub/f3
        chunk3 = [c[0] for c in chunks[2]]
        assert len(chunk3) == 4
        assert chunk3[0] == "pgdata/split_top"
        assert chunk3[1] == "pgdata/split_top/split_sub"
        assert chunk3[2] == "pgdata/split_top/split_sub/f2"
        assert chunk3[3] == "pgdata/split_top/split_sub/f3"
Exemplo n.º 8
0
    def test_parse_backup_label(self, tmpdir):
        td = str(tmpdir)
        fn = os.path.join(td, "backup.tar")
        with tarfile.open(fn, "w") as tfile:
            with open(os.path.join(td, "backup_label"), "wb") as fp:
                fp.write(b'''\
START WAL LOCATION: 0/4000028 (file 000000010000000000000004)
CHECKPOINT LOCATION: 0/4000060
BACKUP METHOD: streamed
BACKUP FROM: master
START TIME: 2015-02-12 14:07:19 GMT
LABEL: pg_basebackup base backup
''')
            tfile.add(os.path.join(td, "backup_label"), arcname="backup_label")
        pgb = PGBaseBackup(config=None, site="foosite", connection_string=None,
                           basebackup_path=None, compression_queue=None, transfer_queue=None)
        start_wal_segment, start_time = pgb.parse_backup_label(fn)
        assert start_wal_segment == "000000010000000000000004"
        assert start_time == "2015-02-12T14:07:19+00:00"
Exemplo n.º 9
0
    def create_basebackup(self, site, connection_info, basebackup_path, callback_queue=None):
        connection_string, _ = replication_connection_string_and_slot_using_pgpass(connection_info)
        pg_version_server = self.check_pg_server_version(connection_string, site)
        if not self.check_pg_versions_ok(site, pg_version_server, "pg_basebackup"):
            if callback_queue:
                callback_queue.put({"success": False})
            return

        thread = PGBaseBackup(
            config=self.config,
            site=site,
            connection_info=connection_info,
            basebackup_path=basebackup_path,
            compression_queue=self.compression_queue,
            transfer_queue=self.transfer_queue,
            callback_queue=callback_queue,
            pg_version_server=pg_version_server,
            metrics=self.metrics)
        thread.start()
        self.basebackups[site] = thread
Exemplo n.º 10
0
    def test_find_files(self, db):
        top1 = os.path.join(db.pgdata, "top1.test")
        top2 = os.path.join(db.pgdata, "top2.test")
        sub1 = os.path.join(db.pgdata, "global", "sub1.test")
        sub2 = os.path.join(db.pgdata, "global", "sub2.test")
        sub3 = os.path.join(db.pgdata, "global", "sub3.test")

        def create_test_files():
            # Create two temporary files on top level and one in global/ that we'll unlink while iterating
            with open(top1, "w") as t1, open(top2, "w") as t2, \
                    open(sub1, "w") as s1, open(sub2, "w") as s2, open(sub3, "w") as s3:
                t1.write("t1\n")
                t2.write("t2\n")
                s1.write("s1\n")
                s2.write("s2\n")
                s3.write("s3\n")

        pgb = PGBaseBackup(config=None,
                           site="foosite",
                           connection_info=None,
                           basebackup_path=None,
                           compression_queue=None,
                           transfer_queue=None,
                           metrics=metrics.Metrics(statsd={}))
        create_test_files()
        files = pgb.find_files_to_backup(pgdata=db.pgdata, tablespaces={})
        first_file = next(files)
        os.unlink(top1)
        os.unlink(top2)
        os.unlink(sub1)
        os.unlink(sub2)

        # Missing files are not accepted at top level
        with pytest.raises(FileNotFoundError):
            list(files)

        # Recreate test files and unlink just the one from a subdirectory
        create_test_files()
        files = pgb.find_files_to_backup(pgdata=db.pgdata, tablespaces={})
        first_file = next(files)
        os.unlink(sub1)
        # Missing files in sub directories are ok
        ftbu = [first_file[:-1]] + list(f[:-1]
                                        for f in files if f[-1] != "leave")

        # Check that missing_ok flag is not set for top-level items
        for bu_path, local_path, missing_ok in ftbu:
            if os.path.dirname(bu_path) == "pgdata":
                assert missing_ok is False, (bu_path, local_path, missing_ok)
            else:
                assert missing_ok is True, (bu_path, local_path, missing_ok)

        # files to backup should include both top level items and two sub-level items
        bunameset = set(item[0] for item in ftbu)
        assert len(bunameset) == len(ftbu)
        assert "pgdata/top1.test" in bunameset
        assert "pgdata/top2.test" in bunameset
        assert "pgdata/global/sub1.test" not in bunameset
        assert "pgdata/global/sub2.test" in bunameset
        assert "pgdata/global/sub3.test" in bunameset

        # Now delete a file on the top level before we have a chance of tarring anything
        os.unlink(top2)

        class FakeTar:
            def __init__(self):
                self.items = []

            def add(self, local_path, *, arcname, recursive):
                assert recursive is False
                self.items.append((local_path, arcname, os.stat(local_path)))

        # This will fail because top-level items may not be missing
        faketar = FakeTar()
        with pytest.raises(FileNotFoundError):
            pgb.write_files_to_tar(files=ftbu, tar=faketar)

        # Recreate test files and unlink just a subdirectory item
        create_test_files()
        os.unlink(sub2)

        # Now adding files should work and we should end up with every file except for sub2 in the archive
        faketar = FakeTar()
        pgb.write_files_to_tar(files=ftbu, tar=faketar)
        arcnameset = set(item[1] for item in faketar.items)
        assert len(arcnameset) == len(faketar.items)
        expected_items = bunameset - {"pgdata/global/sub2.test"}
        assert arcnameset == expected_items
        assert "pgdata/global/sub1.test" not in arcnameset  # not in set of files to backup
        assert "pgdata/global/sub2.test" not in arcnameset  # acceptable loss
        assert "pgdata/global/sub3.test" in arcnameset  # acceptable
Exemplo n.º 11
0
    def _test_restore_basebackup(self,
                                 db,
                                 pghoard,
                                 tmpdir,
                                 active_backup_mode="archive_command"):
        backup_out = tmpdir.join("test-restore").strpath
        # Restoring to empty directory works
        os.makedirs(backup_out)
        Restore().run([
            "get-basebackup",
            "--config",
            pghoard.config_path,
            "--site",
            pghoard.test_site,
            "--target-dir",
            backup_out,
        ])
        # Restoring on top of another $PGDATA doesn't
        with pytest.raises(RestoreError) as excinfo:
            Restore().run([
                "get-basebackup",
                "--config",
                pghoard.config_path,
                "--site",
                pghoard.test_site,
                "--target-dir",
                backup_out,
            ])
        assert "--overwrite not specified" in str(excinfo.value)
        # Until we use the --overwrite flag
        Restore().run([
            "get-basebackup",
            "--config",
            pghoard.config_path,
            "--site",
            pghoard.test_site,
            "--target-dir",
            backup_out,
            "--overwrite",
        ])
        check_call([os.path.join(db.pgbin, "pg_controldata"), backup_out])
        # TODO: check that the backup is valid

        # there should only be a single backup so lets compare what was in the metadata with what
        # was in the backup label
        storage_config = common.get_object_storage_config(
            pghoard.config, pghoard.test_site)
        storage = get_transfer(storage_config)
        backups = storage.list_path(
            os.path.join(
                pghoard.config["backup_sites"][pghoard.test_site]["prefix"],
                "basebackup"))

        # lets grab the backup label details for what we restored
        pgb = PGBaseBackup(config=None,
                           site="foosite",
                           connection_info=None,
                           basebackup_path=None,
                           compression_queue=None,
                           transfer_queue=None,
                           metrics=metrics.Metrics(statsd={}))

        path = os.path.join(backup_out, "backup_label")
        with open(path, "r") as myfile:
            data = myfile.read()
            start_wal_segment, start_time = pgb.parse_backup_label(data)

        assert start_wal_segment == backups[0]['metadata']['start-wal-segment']
        assert start_time == backups[0]['metadata']['start-time']

        # for a standalone hot backup, the start wal file will be in the pg_xlog / pg_wal directory
        wal_dir = "pg_xlog"
        if float(db.pgver) >= float("10.0"):
            wal_dir = "pg_wal"

        path = os.path.join(backup_out, wal_dir,
                            backups[0]['metadata']['start-wal-segment'])
        if active_backup_mode == "standalone_hot_backup":
            assert os.path.isfile(path) is True
        else:
            assert os.path.isfile(path) is False
Exemplo n.º 12
0
    def test_find_files(self, db):
        top1 = os.path.join(db.pgdata, "top1.test")
        top2 = os.path.join(db.pgdata, "top2.test")
        sub1 = os.path.join(db.pgdata, "global", "sub1.test")
        sub2 = os.path.join(db.pgdata, "global", "sub2.test")
        sub3 = os.path.join(db.pgdata, "global", "sub3.test")

        def create_test_files():
            # Create two temporary files on top level and one in global/ that we'll unlink while iterating
            with open(top1, "w") as t1, open(top2, "w") as t2, \
                    open(sub1, "w") as s1, open(sub2, "w") as s2, open(sub3, "w") as s3:
                t1.write("t1\n")
                t2.write("t2\n")
                s1.write("s1\n")
                s2.write("s2\n")
                s3.write("s3\n")

        pgb = PGBaseBackup(config=None, site="foosite", connection_info=None,
                           basebackup_path=None, compression_queue=None, transfer_queue=None,
                           stats=statsd.StatsClient(host=None))
        create_test_files()
        files = pgb.find_files_to_backup(pgdata=db.pgdata, tablespaces={})
        first_file = next(files)
        os.unlink(top1)
        os.unlink(top2)
        os.unlink(sub1)
        os.unlink(sub2)

        # Missing files are not accepted at top level
        with pytest.raises(FileNotFoundError):
            list(files)

        # Recreate test files and unlink just the one from a subdirectory
        create_test_files()
        files = pgb.find_files_to_backup(pgdata=db.pgdata, tablespaces={})
        first_file = next(files)
        os.unlink(sub1)
        # Missing files in sub directories are ok
        ftbu = [first_file] + list(files)

        # Check that missing_ok flag is not set for top-level items
        for bu_path, local_path, missing_ok in ftbu:
            if os.path.dirname(bu_path) == "pgdata":
                assert missing_ok is False, (bu_path, local_path, missing_ok)
            else:
                assert missing_ok is True, (bu_path, local_path, missing_ok)

        # files to backup should include both top level items and two sub-level items
        bunameset = set(item[0] for item in ftbu)
        assert len(bunameset) == len(ftbu)
        assert "pgdata/top1.test" in bunameset
        assert "pgdata/top2.test" in bunameset
        assert "pgdata/global/sub1.test" not in bunameset
        assert "pgdata/global/sub2.test" in bunameset
        assert "pgdata/global/sub3.test" in bunameset

        # Now delete a file on the top level before we have a chance of tarring anything
        os.unlink(top2)

        class FakeTar:
            def __init__(self):
                self.items = []

            def add(self, local_path, *, arcname, recursive):
                assert recursive is False
                self.items.append((local_path, arcname, os.stat(local_path)))

        # This will fail because top-level items may not be missing
        faketar = FakeTar()
        with pytest.raises(FileNotFoundError):
            pgb.write_files_to_tar(files=ftbu, tar=faketar)

        # Recreate test files and unlink just a subdirectory item
        create_test_files()
        os.unlink(sub2)

        # Now adding files should work and we should end up with every file except for sub2 in the archive
        faketar = FakeTar()
        pgb.write_files_to_tar(files=ftbu, tar=faketar)
        arcnameset = set(item[1] for item in faketar.items)
        assert len(arcnameset) == len(faketar.items)
        expected_items = bunameset - {"pgdata/global/sub2.test"}
        assert arcnameset == expected_items
        assert "pgdata/global/sub1.test" not in arcnameset  # not in set of files to backup
        assert "pgdata/global/sub2.test" not in arcnameset  # acceptable loss
        assert "pgdata/global/sub3.test" in arcnameset  # acceptable

        # Add pg_control
        pgb.write_pg_control_to_tar(pgdata=db.pgdata, tar=faketar)
        arcnameset = set(item[1] for item in faketar.items)
        assert len(arcnameset) == len(faketar.items)
        expected_items = (bunameset | {"pgdata/global/pg_control"}) - {"pgdata/global/sub2.test"}
        assert arcnameset == expected_items