def test_get_compressor_custom(self, _reset_custom_compressor): # GIVEN a Barman config which specifies custom compression config_mock = mock.Mock() config_mock.compression = "custom" config_mock.custom_compression_filter = "test_custom_compression_filter" config_mock.custom_decompression_filter = "test_custom_decompression_filter" # AND the custom compression magic bytes are set config_mock.custom_compression_magic = "0x28b52ffd" # WHEN the compression manager is created comp_manager = CompressionManager(config_mock, None) # THEN a default compressor can be obtained assert comp_manager.get_default_compressor() is not None # AND the magic bytes of the compressor match those in the config assert comp_manager.get_default_compressor( ).MAGIC == b"\x28\xb5\x2f\xfd" # AND unidentified_compression is set to None as there is no need # to make the legacy assumption that unidentified compression means # custom compression assert comp_manager.unidentified_compression is None # AND the value of MAGIC_MAX_LENGTH equals the length of the magic bytes assert comp_manager.MAGIC_MAX_LENGTH == 4
def __init__(self, server): '''Constructor''' self.name = "default" self.server = server self.config = server.config self.available_backups = {} self.compression_manager = CompressionManager(self.config)
def test_identify_compression(self, tmpdir): # prepare mock obj config_mock = mock.Mock() config_mock.compression = "bzip2" # check custom compression method creation comp_manager = CompressionManager(config_mock, None) assert comp_manager.get_default_compressor() is not None bz2_tmp_file = tmpdir.join("test_file") # "test" in bz2 compression bz2_tmp_file.write( base64.b64decode( b"QlpoOTFBWSZTWczDcdQAAAJBgAAQAgAMACAAIZpoM00Zl4u5IpwoSGZhuOoA" ), mode="wb", ) compression_bz2 = comp_manager.identify_compression( bz2_tmp_file.strpath) assert compression_bz2 == "bzip2" zip_tmp_file = tmpdir.join("test_file") # "test" in bz2 compression zip_tmp_file.write( base64.b64decode(b"H4sIAF0ssFIAAytJLS7hAgDGNbk7BQAAAA=="), mode="wb") # check custom compression method creation compression_zip = comp_manager.identify_compression( zip_tmp_file.strpath) assert compression_zip == "gzip"
def test_bzip2(self, tmpdir): config_mock = mock.Mock() compression_manager = CompressionManager(config_mock, tmpdir.strpath) compressor = PyBZip2Compressor(config=config_mock, compression="pybzip2") src = tmpdir.join("sourcefile") src.write("content") compressor.compress(src.strpath, BZIP2_FILE % tmpdir.strpath) assert os.path.exists(BZIP2_FILE % tmpdir.strpath) compression_zip = compression_manager.identify_compression( BZIP2_FILE % tmpdir.strpath, ) assert compression_zip == "bzip2" compressor.decompress( BZIP2_FILE % tmpdir.strpath, BZIP2_FILE_UNCOMPRESSED % tmpdir.strpath, ) f = open(BZIP2_FILE_UNCOMPRESSED % tmpdir.strpath).read() assert f == "content"
def test_get_compressor_custom_nomagic(self, _reset_custom_compressor): # GIVEN a Barman config which specifies custom compression config_mock = mock.Mock() config_mock.compression = "custom" config_mock.custom_compression_filter = "test_custom_compression_filter" config_mock.custom_decompression_filter = "test_custom_decompression_filter" # AND no magic bytes are set config_mock.custom_compression_magic = None # WHEN the compression manager is created comp_manager = CompressionManager(config_mock, None) # THEN a default compressor can be obtained assert comp_manager.get_default_compressor() is not None # AND the magic bytes of the compressor are None assert comp_manager.get_default_compressor().MAGIC is None # AND unidentified_compression is set to "custom" as this assumption # is the legacy way of identifying custom compression, used when magic # bytes is not set assert comp_manager.unidentified_compression == "custom" # AND the value of MAGIC_MAX_LENGTH equals the max length of the default # compressions assert comp_manager.MAGIC_MAX_LENGTH == 3
def test_get_compressor_invalid(self): #prepare mock obj config_mock = mock.Mock() # check custom compression method creation comp_manager = CompressionManager(config_mock) assert comp_manager.get_compressor("test_compression") is None
def test_get_compressor_invalid(self): # prepare mock obj config_mock = mock.Mock() # check custom compression method creation comp_manager = CompressionManager(config_mock, None) assert comp_manager.get_compressor("test_compression") is None
def test_get_compressor_bzip2(self): #prepare mock obj config_mock = mock.Mock() config_mock.compression = "bzip2" # check custom compression method creation comp_manager = CompressionManager(config_mock) assert comp_manager.get_compressor() is not None
def test_get_compressor_bzip2(self): # prepare mock obj config_mock = mock.Mock() config_mock.compression = "bzip2" # check custom compression method creation comp_manager = CompressionManager(config_mock, None) assert comp_manager.get_default_compressor() is not None
def __init__(self, server): '''Constructor''' self.name = "default" self.server = server self.config = server.config self.available_backups = {} self.compression_manager = CompressionManager(self.config) # used for error messages self.current_action = None
def test_get_compressor_custom(self): # prepare mock obj config_mock = mock.Mock() config_mock.compression = "custom" config_mock.custom_compression_filter = "test_custom_compression_filter" config_mock.custom_decompression_filter = "test_custom_decompression_filter" # check custom compression method creation comp_manager = CompressionManager(config_mock, None) assert comp_manager.get_default_compressor() is not None
def test_get_compressor_custom(self): #prepare mock obj config_mock = mock.Mock() config_mock.compression = "custom" config_mock.custom_compression_filter = "test_custom_compression_filter" config_mock.custom_decompression_filter = \ "test_custom_decompression_filter" # check custom compression method creation comp_manager = CompressionManager(config_mock) assert comp_manager.get_compressor() is not None
def __init__(self, server): """ Constructor """ super(BackupManager, self).__init__() self.server = server self.config = server.config self._backup_cache = None self.compression_manager = CompressionManager(self.config, server.path) self.executor = None try: self.executor = BackupExecutor.factory(self) except SshCommandException as e: self.config.disabled = True self.config.msg_list.append(str(e).strip())
def __init__(self, server): """ Constructor """ super(BackupManager, self).__init__() self.name = "default" self.server = server self.config = server.config self._backup_cache = None self.compression_manager = CompressionManager(self.config, server.path) self.executor = None try: if self.config.backup_method == "postgres": self.executor = PostgresBackupExecutor(self) else: self.executor = RsyncBackupExecutor(self) except SshCommandException as e: self.config.disabled = True self.config.msg_list.append(str(e).strip())
def __init__(self, server): """ Constructor """ super(BackupManager, self).__init__() self.name = "default" self.server = server self.config = server.config self._backup_cache = None self.compression_manager = CompressionManager(self.config, server.path) self.executor = None try: self.executor = RsyncBackupExecutor(self) except SshCommandException as e: self.config.disabled = True self.config.msg_list.append(str(e).strip())
def test_check_with_compression(self): #prepare mock obj config_mock = mock.Mock() comp_manager = CompressionManager(config_mock) assert comp_manager.check('test_compression') is False
def test_check_compression_none(self): #prepare mock obj config_mock = mock.Mock() config_mock.compression = "custom" comp_manager = CompressionManager(config_mock) assert comp_manager.check() is True
def test_archive_wal(self, mock_compression_registry, tmpdir, capsys): """ Test WalArchiver.archive_wal behaviour when the WAL file already exists in the archive """ # Hack the compression registry so we do not attempt to use native gzip with patch.dict( "barman.compression.compression_registry", mock_compression_registry, clear=True, ): # Setup the test environment backup_manager = build_backup_manager( name="TestServer", global_conf={"barman_home": tmpdir.strpath}) # Replace mock compression manager with a real compression manager backup_manager.compression_manager = CompressionManager( backup_manager.config, tmpdir.strpath) backup_manager.server.get_backup.return_value = None basedir = tmpdir.join("main") incoming_dir = basedir.join("incoming") archive_dir = basedir.join("wals") xlog_db = archive_dir.join("xlog.db") wal_name = "000000010000000000000001" wal_file = incoming_dir.join(wal_name) wal_file.ensure() archive_dir.ensure(dir=True) xlog_db.ensure() backup_manager.server.xlogdb.return_value.__enter__.return_value = ( xlog_db.open(mode="a")) archiver = FileWalArchiver(backup_manager) backup_manager.server.archivers = [archiver] # Tests a basic archival process wal_info = WalFileInfo.from_file( wal_file.strpath, backup_manager.compression_manager) archiver.archive_wal(None, wal_info) assert not os.path.exists(wal_file.strpath) assert os.path.exists(wal_info.fullpath(backup_manager.server)) # Tests the archiver behaviour for duplicate WAL files, as the # wal file named '000000010000000000000001' was already archived # in the previous test wal_file.ensure() wal_info = WalFileInfo.from_file( wal_file.strpath, backup_manager.compression_manager) with pytest.raises(MatchingDuplicateWalFile): archiver.archive_wal(None, wal_info) # Tests the archiver behaviour for duplicated WAL files with # different contents wal_file.write("test") wal_info = WalFileInfo.from_file( wal_file.strpath, backup_manager.compression_manager) with pytest.raises(DuplicateWalFile): archiver.archive_wal(None, wal_info) # Tests the archiver behaviour for duplicate WAL files, as the # wal file named '000000010000000000000001' was already archived # in the previous test and the input file uses compression compressor = PyGZipCompressor(backup_manager.config, "pygzip") compressor.compress(wal_file.strpath, wal_file.strpath) wal_info = WalFileInfo.from_file( wal_file.strpath, backup_manager.compression_manager) assert os.path.exists(wal_file.strpath) with pytest.raises(MatchingDuplicateWalFile): archiver.archive_wal(None, wal_info) # Test the archiver behaviour when the incoming file is compressed # and it has been already archived and compressed. compressor.compress( wal_info.fullpath(backup_manager.server), wal_info.fullpath(backup_manager.server), ) wal_info = WalFileInfo.from_file( wal_file.strpath, backup_manager.compression_manager) with pytest.raises(MatchingDuplicateWalFile): archiver.archive_wal(None, wal_info) # Reset the status of the incoming and WALs directory # removing the files archived during the preceding tests. os.unlink(wal_info.fullpath(backup_manager.server)) os.unlink(wal_file.strpath) # Test the archival of a WAL file using compression. wal_file.write("test") wal_info = WalFileInfo.from_file( wal_file.strpath, backup_manager.compression_manager) archiver.archive_wal(compressor, wal_info) assert os.path.exists(wal_info.fullpath(backup_manager.server)) assert not os.path.exists(wal_file.strpath) assert "pygzip" == CompressionManager( MagicMock(), "").identify_compression( wal_info.fullpath(backup_manager.server))
def test_check_with_compression(self): # prepare mock obj config_mock = mock.Mock() comp_manager = CompressionManager(config_mock, None) assert comp_manager.check("test_compression") is False
class BackupManager(object): '''Manager of the backup archive for a server''' DEFAULT_STATUS_FILTER = (BackupInfo.DONE, ) def __init__(self, server): '''Constructor''' self.name = "default" self.server = server self.config = server.config self.available_backups = {} self.compression_manager = CompressionManager(self.config) def get_available_backups(self, status_filter=DEFAULT_STATUS_FILTER): ''' Get a list of available backups :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup list returned ''' if not isinstance(status_filter, tuple): status_filter = tuple(status_filter) if status_filter not in self.available_backups: available_backups = {} for filename in glob("%s/*/backup.info" % self.config.basebackups_directory): backup = BackupInfo(self.server, filename) if backup.status not in status_filter: continue available_backups[backup.backup_id] = backup self.available_backups[status_filter] = available_backups return available_backups else: return self.available_backups[status_filter] def get_previous_backup(self, backup_id, status_filter=DEFAULT_STATUS_FILTER): ''' Get the previous backup (if any) in the catalog :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup returned ''' if not isinstance(status_filter, tuple): status_filter = tuple(status_filter) backup = BackupInfo(self.server, backup_id=backup_id) available_backups = self.get_available_backups(status_filter + (backup.status, )) ids = sorted(available_backups.keys()) try: current = ids.index(backup_id) while current > 0: res = available_backups[ids[current - 1]] if res.status in status_filter: return res current -= 1 else: return None except ValueError: raise Exception('Could not find backup_id %s' % backup_id) def get_next_backup(self, backup_id, status_filter=DEFAULT_STATUS_FILTER): ''' Get the next backup (if any) in the catalog :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup returned ''' if not isinstance(status_filter, tuple): status_filter = tuple(status_filter) backup = BackupInfo(self.server, backup_id=backup_id) available_backups = self.get_available_backups(status_filter + (backup.status, )) ids = sorted(available_backups.keys()) try: current = ids.index(backup_id) while current < (len(ids) - 1): res = available_backups[ids[current + 1]] if res.status in status_filter: return res current += 1 else: return None except ValueError: raise Exception('Could not find backup_id %s' % backup_id) def get_last_backup(self, status_filter=DEFAULT_STATUS_FILTER): ''' Get the last backup (if any) in the catalog :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup returned ''' available_backups = self.get_available_backups(status_filter) if len(available_backups) == 0: return None ids = sorted(available_backups.keys()) return ids[-1] def get_first_backup(self, status_filter=DEFAULT_STATUS_FILTER): ''' Get the first backup (if any) in the catalog :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup returned ''' available_backups = self.get_available_backups(status_filter) if len(available_backups) == 0: return None ids = sorted(available_backups.keys()) return ids[0] def delete_backup(self, backup): ''' Delete a backup :param backup: the backup to delete ''' yield "Deleting backup %s for server %s" % (backup.backup_id, self.config.name) previous_backup = self.get_previous_backup(backup.backup_id) next_backup = self.get_next_backup(backup.backup_id) # remove the backup self.delete_basebackup(backup) if not previous_backup: # backup is the first one yield "Delete associated WAL segments:" remove_until = None if next_backup: remove_until = next_backup.begin_wal with self.server.xlogdb() as fxlogdb: xlogdb_new = fxlogdb.name + ".new" with open(xlogdb_new, 'w') as fxlogdb_new: for line in fxlogdb: name, _, _, _ = self.server.xlogdb_parse_line(line) if remove_until and name >= remove_until: fxlogdb_new.write(line) continue else: yield "\t%s" % name # Delete the WAL segment self.delete_wal(name) os.rename(xlogdb_new, fxlogdb.name) yield "Done" def build_script_env(self, backup_info, phase): """ Prepare the environment for executing a script """ previous_backup = self.get_previous_backup(backup_info.backup_id) env = {} env['BARMAN_BACKUP_DIR'] = backup_info.get_basebackup_directory() env['BARMAN_SERVER'] = self.config.name env['BARMAN_CONFIGURATION'] = self.config.config.config_file env['BARMAN_BACKUP_ID'] = backup_info.backup_id env['BARMAN_PREVIOUS_ID'] = previous_backup.backup_id if previous_backup else '' env['BARMAN_PHASE'] = phase env['BARMAN_STATUS'] = backup_info.status env['BARMAN_ERROR'] = backup_info.error or '' return env def run_pre_backup_script(self, backup_info): ''' Run the pre_backup_script if configured. This method must never throw any exception ''' try: script = self.config.pre_backup_script if script: _logger.info("Attempt to run pre_backup_script: %s", script) cmd = Command(script, env_append=self.build_script_env( backup_info, 'pre'), shell=True, check=False) ret = cmd() _logger.info("pre_backup_script returned %d", ret) except Exception: _logger.exception('Exception running pre_backup_script') def run_post_backup_script(self, backup_info): ''' Run the post_backup_script if configured. This method must never throw any exception ''' try: script = self.config.post_backup_script if script: _logger.info("Attempt to run post_backup_script: %s", script) cmd = Command(script, env_append=self.build_script_env( backup_info, 'post'), shell=True, check=False) ret = cmd() _logger.info("post_backup_script returned %d", ret) except Exception: _logger.exception('Exception running post_backup_script') def backup(self): ''' Performs a backup for the server ''' _logger.debug("initialising backup information") backup_stamp = datetime.datetime.now() current_action = "starting backup" backup_info = None try: backup_info = BackupInfo( self.server, backup_id=backup_stamp.strftime('%Y%m%dT%H%M%S')) backup_info.save() msg = "Starting backup for server %s in %s" % ( self.config.name, backup_info.get_basebackup_directory()) _logger.info(msg) yield msg # Run the pre-backup-script if present. self.run_pre_backup_script(backup_info) # Start the backup self.backup_start(backup_info) backup_info.set_attribute("begin_time", backup_stamp) backup_info.save() msg = "Backup start at xlog location: %s (%s, %08X)" % ( backup_info.begin_xlog, backup_info.begin_wal, backup_info.begin_offset) yield msg _logger.info(msg) current_action = "copying files" _logger.debug(current_action) try: # Start the copy msg = "Copying files." yield msg _logger.info(msg) backup_size = self.backup_copy(backup_info) backup_info.set_attribute("size", backup_size) msg = "Copy done." yield msg _logger.info(msg) except: raise else: current_action = "issuing stop of the backup" msg = "Asking PostgreSQL server to finalize the backup." yield msg _logger.info(msg) finally: self.backup_stop(backup_info) backup_info.set_attribute("status", "DONE") except: if backup_info: backup_info.set_attribute("status", "FAILED") backup_info.set_attribute("error", "failure %s" % current_action) msg = "Backup failed %s" % current_action _logger.exception(msg) yield msg else: msg = "Backup end at xlog location: %s (%s, %08X)" % ( backup_info.end_xlog, backup_info.end_wal, backup_info.end_offset) _logger.info(msg) yield msg msg = "Backup completed" _logger.info(msg) yield msg finally: if backup_info: backup_info.save() # Run the post-backup-script if present. self.run_post_backup_script(backup_info) def recover(self, backup, dest, tablespaces, target_tli, target_time, target_xid, exclusive, remote_command): ''' Performs a recovery of a backup :param backup: the backup to recover :param dest: the destination directory :param tablespaces: a dictionary of tablespaces :param target_tli: the target timeline :param target_time: the target time :param target_xid: the target xid :param exclusive: whether the recovery is exlusive or not :param remote_command: default None. The remote command to recover the base backup, in case of remote backup. ''' for line in self.cron(False): yield line recovery_dest = 'local' if remote_command: recovery_dest = 'remote' rsync = RsyncPgData(ssh=remote_command) msg = "Starting %s restore for server %s using backup %s " % ( recovery_dest, self.config.name, backup.backup_id) yield msg _logger.info(msg) msg = "Destination directory: %s" % dest yield msg _logger.info(msg) if backup.tablespaces: if remote_command: # TODO: remote dir preparation msg = "Skipping remote directory preparation, you must have done it by yourself." yield msg _logger.warning(msg) else: tblspc_dir = os.path.join(dest, 'pg_tblspc') if not os.path.exists(tblspc_dir): os.makedirs(tblspc_dir) for name, oid, location in backup.tablespaces: try: if name in tablespaces: location = tablespaces[name] tblspc_file = os.path.join(tblspc_dir, str(oid)) if os.path.exists(tblspc_file): os.unlink(tblspc_file) if os.path.exists( location) and not os.path.isdir(location): os.unlink(location) if not os.path.exists(location): os.makedirs(location) # test permissiones barman_write_check_file = os.path.join( location, '.barman_write_check') file(barman_write_check_file, 'a').close() os.unlink(barman_write_check_file) os.symlink(location, tblspc_file) except: msg = "ERROR: unable to prepare '%s' tablespace (destination '%s')" % ( name, location) _logger.critical(msg) raise SystemExit(msg) yield "\t%s, %s, %s" % (oid, name, location) target_epoch = None if target_time: try: target_datetime = dateutil.parser.parse(target_time) except: msg = "ERROR: unable to parse the target time parameter %r" % target_time _logger.critical(msg) raise SystemExit(msg) target_epoch = time.mktime(target_datetime.timetuple()) + ( target_datetime.microsecond / 1000000.) if target_time or target_xid or (target_tli and target_tli != backup.timeline): targets = {} if target_time: targets['time'] = str(target_datetime) if target_xid: targets['xid'] = str(target_xid) if target_tli and target_tli != backup.timeline: targets['timeline'] = str(target_tli) yield "Doing PITR. Recovery target %s" % \ (", ".join(["%s: %r" % (k, v) for k, v in targets.items()])) # Copy the base backup msg = "Copying the base backup." yield msg _logger.info(msg) self.recover_basebackup_copy(backup, dest, remote_command) _logger.info("Base backup copied.") # Prepare WAL segments local directory msg = "Copying required wal segments." _logger.info(msg) yield msg if target_time or target_xid or (target_tli and target_tli != backup.timeline): wal_dest = os.path.join(dest, 'barman_xlog') else: wal_dest = os.path.join(dest, 'pg_xlog') # Retrieve the list of required WAL segments according to recovery options xlogs = {} required_xlog_files = tuple( self.server.get_required_xlog_files(backup, target_tli, target_epoch, target_xid)) for filename in required_xlog_files: hashdir = xlog.hash_dir(filename) if hashdir not in xlogs: xlogs[hashdir] = [] xlogs[hashdir].append(filename) # Check decompression options decompressor = self.compression_manager.get_decompressor() # Restore WAL segments self.recover_xlog_copy(decompressor, xlogs, wal_dest, remote_command) _logger.info("Wal segmets copied.") # Generate recovery.conf file (only if needed by PITR) if target_time or target_xid or (target_tli and target_tli != backup.timeline): msg = "Generating recovery.conf" yield msg _logger.info(msg) if remote_command: tempdir = tempfile.mkdtemp(prefix='barman_recovery-') recovery = open(os.path.join(tempdir, 'recovery.conf'), 'w') else: recovery = open(os.path.join(dest, 'recovery.conf'), 'w') print >> recovery, "restore_command = 'cp barman_xlog/%f %p'" print >> recovery, "recovery_end_command = 'rm -fr barman_xlog'" if target_time: print >> recovery, "recovery_target_time = '%s'" % target_time if target_tli: print >> recovery, "recovery_target_timeline = %s" % target_tli if target_xid: print >> recovery, "recovery_target_xid = '%s'" % target_xid if exclusive: print >> recovery, "recovery_target_inclusive = '%s'" % ( not exclusive) recovery.close() if remote_command: recovery = rsync.from_file_list(['recovery.conf'], tempdir, ':%s' % dest) shutil.rmtree(tempdir) _logger.info('recovery.conf generated') else: # avoid shipping of just recovered pg_xlog files if remote_command: status_dir = tempfile.mkdtemp(prefix='barman_xlog_status-') else: status_dir = os.path.join(wal_dest, 'archive_status') os.makedirs(status_dir) # no need to check, it must not exist for filename in required_xlog_files: with file(os.path.join(status_dir, "%s.done" % filename), 'a') as f: f.write('') if remote_command: retval = rsync( '%s/' % status_dir, ':%s' % os.path.join(wal_dest, 'archive_status')) if retval != 0: msg = "WARNING: unable to populate pg_xlog/archive_status dorectory" yield msg _logger.warning(msg) shutil.rmtree(status_dir) # Disable dangerous setting in the target data dir if remote_command: tempdir = tempfile.mkdtemp(prefix='barman_recovery-') pg_config = os.path.join(tempdir, 'postgresql.conf') shutil.copy2( os.path.join(backup.get_basebackup_directory(), 'pgdata', 'postgresql.conf'), pg_config) else: pg_config = os.path.join(dest, 'postgresql.conf') if self.pg_config_mangle(pg_config, {'archive_command': 'false'}, "%s.origin" % pg_config): msg = "The archive_command was set to 'false' to prevent data losses." yield msg _logger.info(msg) if remote_command: recovery = rsync.from_file_list( ['postgresql.conf', 'postgresql.conf.origin'], tempdir, ':%s' % dest) shutil.rmtree(tempdir) # Found dangerous options in the configuration file (locations) clashes = self.pg_config_detect_possible_issues(pg_config) yield "" yield "Your PostgreSQL server has been successfully prepared for recovery!" yield "" yield "Please review network and archive related settings in the PostgreSQL" yield "configuration file before starting the just recovered instance." yield "" if clashes: yield "WARNING: Before starting up the recovered PostgreSQL server," yield "please review the also settings of the following configuration" yield "options as they might interfere with your current recovery attempt:" yield "" for name, value in sorted(clashes.items()): yield " %s = %s" % (name, value) yield "" _logger.info("Recovery completed successful.") def cron(self, verbose): ''' Executes maintenance operations, such as WAL trashing. :param verbose: print some information ''' found = False compressor = self.compression_manager.get_compressor() with self.server.xlogdb('a') as fxlogdb: if verbose: yield "Processing xlog segments for %s" % self.config.name available_backups = self.get_available_backups( BackupInfo.STATUS_ALL) for filename in sorted( glob(os.path.join(self.config.incoming_wals_directory, '*'))): if not found and not verbose: yield "Processing xlog segments for %s" % self.config.name found = True if not len(available_backups): msg = "No base backup available. Trashing file %s" % os.path.basename( filename) yield "\t%s" % msg _logger.warning(msg) os.unlink(filename) continue # Archive the WAL file basename, size, time = self.cron_wal_archival( compressor, filename) # Updates the information of the WAL archive with the latest segement's fxlogdb.write("%s\t%s\t%s\t%s\n" % (basename, size, time, self.config.compression)) _logger.info('Processed file %s', filename) yield "\t%s" % os.path.basename(filename) if not found and verbose: yield "\tno file found" # # Hooks # def delete_basebackup(self, backup): ''' Delete the given base backup :param backup: the backup to delete ''' backup_dir = backup.get_basebackup_directory() shutil.rmtree(backup_dir) def delete_wal(self, name): ''' Delete a WAL segment, with the given name :param name: the name of the WAL to delete ''' hashdir = os.path.join(self.config.wals_directory, xlog.hash_dir(name)) os.unlink(os.path.join(hashdir, name)) try: os.removedirs(hashdir) except: pass def backup_start(self, backup_info): ''' Start of the backup :param backup_info: the backup information structure ''' current_action = "connecting to database (%s)" % self.config.conninfo _logger.debug(current_action) # Set the PostgreSQL data directory current_action = "detecting data directory" _logger.debug(current_action) data_directory = self.server.get_pg_setting('data_directory') backup_info.set_attribute('pgdata', data_directory) # Set server version backup_info.set_attribute('version', self.server.server_version) # Set configuration files location cf = self.server.get_pg_configuration_files() if cf: for key in sorted(cf.keys()): backup_info.set_attribute(key, cf[key]) # Get server version and tablespaces information current_action = "detecting tablespaces" _logger.debug(current_action) tablespaces = self.server.get_pg_tablespaces() if tablespaces and len(tablespaces) > 0: backup_info.set_attribute("tablespaces", tablespaces) for oid, name, location in tablespaces: msg = "\t%s, %s, %s" % (oid, name, location) _logger.info(msg) # Issue pg_start_backup on the PostgreSQL server current_action = "issuing pg_start_backup command" _logger.debug(current_action) start_xlog, start_file_name, start_file_offset = self.server.pg_start_backup( ) backup_info.set_attribute("status", "STARTED") backup_info.set_attribute("timeline", int(start_file_name[0:8])) backup_info.set_attribute("begin_xlog", start_xlog) backup_info.set_attribute("begin_wal", start_file_name) backup_info.set_attribute("begin_offset", start_file_offset) def backup_copy(self, backup_info): ''' Perform the copy of the backup. This function returns the size of the backup (in bytes) :param backup_info: the backup information structure ''' backup_dest = os.path.join(backup_info.get_basebackup_directory(), 'pgdata') rsync = RsyncPgData(ssh=self.server.ssh_command, ssh_options=self.server.ssh_options) retval = rsync(':%s/' % backup_info.pgdata, backup_dest) if retval not in (0, 24): msg = "ERROR: data transfer failure" _logger.exception(msg) raise Exception(msg) # Copy configuration files (if not inside PGDATA) current_action = "copying configuration files" _logger.debug(current_action) cf = self.server.get_pg_configuration_files() if cf: for key in sorted(cf.keys()): # Consider only those that reside outside of the original PGDATA if cf[key]: if cf[key].find(backup_info.pgdata) == 0: current_action = "skipping %s as contained in %s directory" % ( key, backup_info.pgdata) _logger.debug(current_action) continue else: current_action = "copying %s as outside %s directory" % ( key, backup_info.pgdata) _logger.info(current_action) retval = rsync(':%s' % cf[key], backup_dest) if retval not in (0, 24): raise Exception("ERROR: data transfer failure") current_action = "calculating backup size" _logger.debug(current_action) backup_size = 0 for dirpath, _, filenames in os.walk(backup_dest): for f in filenames: fp = os.path.join(dirpath, f) backup_size += os.path.getsize(fp) return backup_size def backup_stop(self, backup_info): ''' Stop the backup :param backup_info: the backup information structure ''' stop_xlog, stop_file_name, stop_file_offset = self.server.pg_stop_backup( ) backup_info.set_attribute("end_time", datetime.datetime.now()) backup_info.set_attribute("end_xlog", stop_xlog) backup_info.set_attribute("end_wal", stop_file_name) backup_info.set_attribute("end_offset", stop_file_offset) def recover_basebackup_copy(self, backup, dest, remote_command=None): ''' Perform the actual copy of the base backup for recovery purposes :param backup: the backup to recover :param dest: the destination directory :param remote_command: default None. The remote command to recover the base backup, in case of remote backup. ''' rsync = RsyncPgData(ssh=remote_command) sourcedir = '%s/' % os.path.join(backup.get_basebackup_directory(), 'pgdata') if remote_command: dest = ':%s' % dest retval = rsync(sourcedir, dest) if retval != 0: raise Exception("ERROR: data transfer failure") # TODO: Manage different location for configuration files that were not within the data directory def recover_xlog_copy(self, decompressor, xlogs, wal_dest, remote_command=None): ''' Restore WAL segments :param decompressor: the decompressor for the file (if any) :param xlogs: the xlog dictionary to recover :param wal_dest: the destination directory for xlog recover :param remote_command: default None. The remote command to recover the xlog, in case of remote backup. ''' rsync = RsyncPgData(ssh=remote_command) if remote_command: # If remote recovery tell rsync to copy them remotely wal_dest = ':%s' % wal_dest else: # we will not use rsync: destdir must exists if not os.path.exists(wal_dest): os.makedirs(wal_dest) if decompressor and remote_command: xlog_spool = tempfile.mkdtemp(prefix='barman_xlog-') for prefix in xlogs: source_dir = os.path.join(self.config.wals_directory, prefix) if decompressor: if remote_command: for segment in xlogs[prefix]: decompressor(os.path.join(source_dir, segment), os.path.join(xlog_spool, segment)) rsync.from_file_list(xlogs[prefix], xlog_spool, wal_dest) for segment in xlogs[prefix]: os.unlink(os.path.join(xlog_spool, segment)) else: # decompress directly to the right place for segment in xlogs[prefix]: decompressor(os.path.join(source_dir, segment), os.path.join(wal_dest, segment)) else: rsync.from_file_list( xlogs[prefix], "%s/" % os.path.join(self.config.wals_directory, prefix), wal_dest) if decompressor and remote_command: shutil.rmtree(xlog_spool) def cron_wal_archival(self, compressor, filename): ''' Archive a WAL segment from the incoming directory. This function returns the name, the size and the time of the WAL file. :param compressor: the compressor for the file (if any) :param filename: the name of the WAthe name of the WAL ''' basename = os.path.basename(filename) destdir = os.path.join(self.config.wals_directory, xlog.hash_dir(basename)) destfile = os.path.join(destdir, basename) time = os.stat(filename).st_mtime if not os.path.isdir(destdir): os.makedirs(destdir) if compressor: compressor(filename, destfile) shutil.copystat(filename, destfile) os.unlink(filename) else: os.rename(filename, destfile) return basename, os.stat(destfile).st_size, time def check(self): ''' This function performs some checks on the server. Returns 0 if all went well, 1 if any of the checks fails ''' if not self.compression_manager.check(): yield ("\tcompression settings: FAILED", False) else: status = 'OK' try: self.compression_manager.get_compressor() except CompressionIncompatibility, field: yield ("\tcompressor settings '%s': FAILED" % field, False) status = 'FAILED' try: self.compression_manager.get_decompressor() except CompressionIncompatibility, field: yield ("\tdecompressor settings '%s': FAILED" % field, False) status = 'FAILED' yield ("\tcompression settings: %s" % status, status == 'OK')
class BackupManager(RemoteStatusMixin): """Manager of the backup archive for a server""" DEFAULT_STATUS_FILTER = (BackupInfo.DONE, ) def __init__(self, server): """ Constructor """ super(BackupManager, self).__init__() self.name = "default" self.server = server self.config = server.config self._backup_cache = None self.compression_manager = CompressionManager(self.config, server.path) self.executor = None try: if self.config.backup_method == "postgres": self.executor = PostgresBackupExecutor(self) else: self.executor = RsyncBackupExecutor(self) except SshCommandException as e: self.config.disabled = True self.config.msg_list.append(str(e).strip()) def get_available_backups(self, status_filter=DEFAULT_STATUS_FILTER): """ Get a list of available backups :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup list returned """ # If the filter is not a tuple, create a tuple using the filter if not isinstance(status_filter, tuple): status_filter = tuple(status_filter, ) # Load the cache if necessary if self._backup_cache is None: self._load_backup_cache() # Filter the cache using the status filter tuple backups = {} for key, value in self._backup_cache.items(): if value.status in status_filter: backups[key] = value return backups def _load_backup_cache(self): """ Populate the cache of the available backups, reading information from disk. """ self._backup_cache = {} # Load all the backups from disk reading the backup.info files for filename in glob("%s/*/backup.info" % self.config.basebackups_directory): backup = BackupInfo(self.server, filename) self._backup_cache[backup.backup_id] = backup def backup_cache_add(self, backup_info): """ Register a BackupInfo object to the backup cache. NOTE: Initialise the cache - in case it has not been done yet :param barman.infofile.BackupInfo backup_info: the object we want to register in the cache """ # Load the cache if needed if self._backup_cache is None: self._load_backup_cache() # Insert the BackupInfo object into the cache self._backup_cache[backup_info.backup_id] = backup_info def backup_cache_remove(self, backup_info): """ Remove a BackupInfo object from the backup cache This method _must_ be called after removing the object from disk. :param barman.infofile.BackupInfo backup_info: the object we want to remove from the cache """ # Nothing to do if the cache is not loaded if self._backup_cache is None: return # Remove the BackupInfo object from the backups cache del self._backup_cache[backup_info.backup_id] def get_backup(self, backup_id): """ Return the backup information for the given backup id. If the backup_id is None or backup.info file doesn't exists, it returns None. :param str|None backup_id: the ID of the backup to return :rtype: BackupInfo|None """ if backup_id is not None: # Get all the available backups from the cache available_backups = self.get_available_backups( BackupInfo.STATUS_ALL) # Return the BackupInfo if present, or None return available_backups.get(backup_id) return None def get_previous_backup(self, backup_id, status_filter=DEFAULT_STATUS_FILTER): """ Get the previous backup (if any) in the catalog :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup returned """ if not isinstance(status_filter, tuple): status_filter = tuple(status_filter) backup = BackupInfo(self.server, backup_id=backup_id) available_backups = self.get_available_backups(status_filter + (backup.status, )) ids = sorted(available_backups.keys()) try: current = ids.index(backup_id) while current > 0: res = available_backups[ids[current - 1]] if res.status in status_filter: return res current -= 1 return None except ValueError: raise UnknownBackupIdException('Could not find backup_id %s' % backup_id) def get_next_backup(self, backup_id, status_filter=DEFAULT_STATUS_FILTER): """ Get the next backup (if any) in the catalog :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup returned """ if not isinstance(status_filter, tuple): status_filter = tuple(status_filter) backup = BackupInfo(self.server, backup_id=backup_id) available_backups = self.get_available_backups(status_filter + (backup.status, )) ids = sorted(available_backups.keys()) try: current = ids.index(backup_id) while current < (len(ids) - 1): res = available_backups[ids[current + 1]] if res.status in status_filter: return res current += 1 return None except ValueError: raise UnknownBackupIdException('Could not find backup_id %s' % backup_id) def get_last_backup_id(self, status_filter=DEFAULT_STATUS_FILTER): """ Get the id of the latest/last backup in the catalog (if exists) :param status_filter: The status of the backup to return, default to DEFAULT_STATUS_FILTER. :return string|None: ID of the backup """ available_backups = self.get_available_backups(status_filter) if len(available_backups) == 0: return None ids = sorted(available_backups.keys()) return ids[-1] def get_first_backup_id(self, status_filter=DEFAULT_STATUS_FILTER): """ Get the id of the oldest/first backup in the catalog (if exists) :param status_filter: The status of the backup to return, default to DEFAULT_STATUS_FILTER. :return string|None: ID of the backup """ available_backups = self.get_available_backups(status_filter) if len(available_backups) == 0: return None ids = sorted(available_backups.keys()) return ids[0] def delete_backup(self, backup): """ Delete a backup :param backup: the backup to delete """ available_backups = self.get_available_backups() minimum_redundancy = self.server.config.minimum_redundancy # Honour minimum required redundancy if backup.status == BackupInfo.DONE and \ minimum_redundancy >= len(available_backups): output.warning( "Skipping delete of backup %s for server %s " "due to minimum redundancy requirements " "(minimum redundancy = %s, " "current redundancy = %s)", backup.backup_id, self.config.name, len(available_backups), minimum_redundancy) return output.info("Deleting backup %s for server %s", backup.backup_id, self.config.name) previous_backup = self.get_previous_backup(backup.backup_id) next_backup = self.get_next_backup(backup.backup_id) # Delete all the data contained in the backup try: self.delete_backup_data(backup) except OSError as e: output.error("Failure deleting backup %s for server %s.\n%s", backup.backup_id, self.config.name, e) return # Check if we are deleting the first available backup if not previous_backup: # In the case of exclusive backup (default), removes any WAL # files associated to the backup being deleted. # In the case of concurrent backup, removes only WAL files # prior to the start of the backup being deleted, as they # might be useful to any concurrent backup started immediately # after. remove_until = None # means to remove all WAL files if next_backup: remove_until = next_backup elif BackupOptions.CONCURRENT_BACKUP in self.config.backup_options: remove_until = backup output.info("Delete associated WAL segments:") for name in self.remove_wal_before_backup(remove_until): output.info("\t%s", name) # As last action, remove the backup directory, # ending the delete operation try: self.delete_basebackup(backup) except OSError as e: output.error( "Failure deleting backup %s for server %s.\n%s\n" "Please manually remove the '%s' directory", backup.backup_id, self.config.name, e, backup.get_basebackup_directory()) return self.backup_cache_remove(backup) output.info("Done") def retry_backup_copy(self, target_function, *args, **kwargs): """ Execute the target backup copy function, retrying the configured number of times :param target_function: the base backup target function :param args: args for the target function :param kwargs: kwargs of the target function :return: the result of the target function """ attempts = 0 while True: try: # if is not the first attempt, output the retry number if attempts >= 1: output.warning("Copy of base backup: retry #%s", attempts) # execute the target function for backup copy return target_function(*args, **kwargs) # catch rsync errors except DataTransferFailure as e: # exit condition: if retry number is lower than configured # retry limit, try again; otherwise exit. if attempts < self.config.basebackup_retry_times: # Log the exception, for debugging purpose _logger.exception("Failure in base backup copy: %s", e) output.warning( "Copy of base backup failed, waiting for next " "attempt in %s seconds", self.config.basebackup_retry_sleep) # sleep for configured time. then try again time.sleep(self.config.basebackup_retry_sleep) attempts += 1 else: # if the max number of attempts is reached and # there is still an error, exit re-raising the exception. raise def backup(self): """ Performs a backup for the server """ _logger.debug("initialising backup information") self.executor.init() backup_info = None try: # Create the BackupInfo object representing the backup backup_info = BackupInfo( self.server, backup_id=datetime.datetime.now().strftime('%Y%m%dT%H%M%S')) backup_info.save() self.backup_cache_add(backup_info) output.info("Starting backup for server %s in %s", self.config.name, backup_info.get_basebackup_directory()) # Run the pre-backup-script if present. script = HookScriptRunner(self, 'backup_script', 'pre') script.env_from_backup_info(backup_info) script.run() # Run the pre-backup-retry-script if present. retry_script = RetryHookScriptRunner(self, 'backup_retry_script', 'pre') retry_script.env_from_backup_info(backup_info) retry_script.run() # Do the backup using the BackupExecutor self.executor.backup(backup_info) # Compute backup size and fsync it on disk self.backup_fsync_and_set_sizes(backup_info) # Mark the backup as DONE backup_info.set_attribute("status", "DONE") # Use BaseException instead of Exception to catch events like # KeyboardInterrupt (e.g.: CRTL-C) except BaseException as e: msg_lines = str(e).strip().splitlines() if backup_info: # Use only the first line of exception message # in backup_info error field backup_info.set_attribute("status", "FAILED") # If the exception has no attached message use the raw # type name if len(msg_lines) == 0: msg_lines = [type(e).__name__] backup_info.set_attribute( "error", "failure %s (%s)" % (self.executor.current_action, msg_lines[0])) output.error("Backup failed %s.\nDETAILS: %s\n%s", self.executor.current_action, msg_lines[0], '\n'.join(msg_lines[1:])) else: output.info("Backup end at xlog location: %s (%s, %08X)", backup_info.end_xlog, backup_info.end_wal, backup_info.end_offset) output.info("Backup completed") # Create a restore point after a backup target_name = 'barman_%s' % backup_info.backup_id self.server.postgres.create_restore_point(target_name) finally: if backup_info: backup_info.save() # Make sure we are not holding any PostgreSQL connection # during the post-backup scripts self.server.close() # Run the post-backup-retry-script if present. try: retry_script = RetryHookScriptRunner( self, 'backup_retry_script', 'post') retry_script.env_from_backup_info(backup_info) retry_script.run() except AbortedRetryHookScript as e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning( "Ignoring stop request after receiving " "abort (exit code %d) from post-backup " "retry hook script: %s", e.hook.exit_status, e.hook.script) # Run the post-backup-script if present. script = HookScriptRunner(self, 'backup_script', 'post') script.env_from_backup_info(backup_info) script.run() output.result('backup', backup_info) def recover(self, backup_info, dest, tablespaces=None, target_tli=None, target_time=None, target_xid=None, target_name=None, exclusive=False, remote_command=None): """ Performs a recovery of a backup :param barman.infofile.BackupInfo backup_info: the backup to recover :param str dest: the destination directory :param dict[str,str]|None tablespaces: a tablespace name -> location map (for relocation) :param str|None target_tli: the target timeline :param str|None target_time: the target time :param str|None target_xid: the target xid :param str|None target_name: the target name created previously with pg_create_restore_point() function call :param bool exclusive: whether the recovery is exclusive or not :param str|None remote_command: default None. The remote command to recover the base backup, in case of remote backup. """ # Archive every WAL files in the incoming directory of the server self.server.archive_wal(verbose=False) # Delegate the recovery operation to a RecoveryExecutor object executor = RecoveryExecutor(self) recovery_info = executor.recover(backup_info, dest, tablespaces, target_tli, target_time, target_xid, target_name, exclusive, remote_command) # Output recovery results output.result('recovery', recovery_info['results']) def archive_wal(self, verbose=True): """ Executes WAL maintenance operations, such as archiving and compression If verbose is set to False, outputs something only if there is at least one file :param bool verbose: report even if no actions """ with self.server.xlogdb('a') as fxlogdb: for archiver in self.server.archivers: archiver.archive(fxlogdb, verbose) def cron_retention_policy(self): """ Retention policy management """ if (self.server.enforce_retention_policies and self.config.retention_policy_mode == 'auto'): available_backups = self.get_available_backups( BackupInfo.STATUS_ALL) retention_status = self.config.retention_policy.report() for bid in sorted(retention_status.keys()): if retention_status[bid] == BackupInfo.OBSOLETE: output.info( "Enforcing retention policy: removing backup %s for " "server %s" % (bid, self.config.name)) self.delete_backup(available_backups[bid]) def delete_basebackup(self, backup): """ Delete the basebackup dir of a given backup. :param barman.infofile.BackupInfo backup: the backup to delete """ backup_dir = backup.get_basebackup_directory() _logger.debug("Deleting base backup directory: %s" % backup_dir) shutil.rmtree(backup_dir) def delete_backup_data(self, backup): """ Delete the data contained in a given backup. :param barman.infofile.BackupInfo backup: the backup to delete """ if backup.tablespaces: if backup.backup_version == 2: tbs_dir = backup.get_basebackup_directory() else: tbs_dir = os.path.join(backup.get_data_directory(), 'pg_tblspc') for tablespace in backup.tablespaces: rm_dir = os.path.join(tbs_dir, str(tablespace.oid)) if os.path.exists(rm_dir): _logger.debug("Deleting tablespace %s directory: %s" % (tablespace.name, rm_dir)) shutil.rmtree(rm_dir) pg_data = backup.get_data_directory() if os.path.exists(pg_data): _logger.debug("Deleting PGDATA directory: %s" % pg_data) shutil.rmtree(pg_data) def delete_wal(self, wal_info): """ Delete a WAL segment, with the given WalFileInfo :param barman.infofile.WalFileInfo wal_info: the WAL to delete """ try: os.unlink(wal_info.fullpath(self.server)) try: os.removedirs(os.path.dirname(wal_info.fullpath(self.server))) except OSError: # This is not an error condition # We always try to remove the the trailing directories, # this means that hashdir is not empty. pass except OSError as e: output.warning( 'Ignoring deletion of WAL file %s ' 'for server %s: %s', wal_info.name, self.config.name, e) def check(self, check_strategy): """ This function does some checks on the server. :param CheckStrategy check_strategy: the strategy for the management of the results of the various checks """ # Check compression_setting parameter if self.config.compression and not self.compression_manager.check(): check_strategy.result(self.config.name, 'compression settings', False) else: status = True try: self.compression_manager.get_compressor() except CompressionIncompatibility as field: check_strategy.result(self.config.name, '%s setting' % field, False) status = False check_strategy.result(self.config.name, 'compression settings', status) # Failed backups check failed_backups = self.get_available_backups((BackupInfo.FAILED, )) status = len(failed_backups) == 0 check_strategy.result( self.config.name, 'failed backups', status, 'there are %s failed backups' % (len(failed_backups, ))) # Minimum redundancy checks no_backups = len(self.get_available_backups()) # Check minimum_redundancy_requirements parameter if no_backups < int(self.config.minimum_redundancy): status = False else: status = True check_strategy.result( self.config.name, 'minimum redundancy requirements', status, 'have %s backups, expected at least %s' % (no_backups, self.config.minimum_redundancy)) # TODO: Add a check for the existence of ssh and of rsync # Execute additional checks defined by the BackupExecutor if self.executor: self.executor.check(check_strategy) def status(self): """ This function show the server status """ # get number of backups no_backups = len(self.get_available_backups()) output.result('status', self.config.name, "backups_number", "No. of available backups", no_backups) output.result('status', self.config.name, "first_backup", "First available backup", self.get_first_backup_id()) output.result('status', self.config.name, "last_backup", "Last available backup", self.get_last_backup_id()) # Minimum redundancy check. if number of backups minor than minimum # redundancy, fail. if no_backups < self.config.minimum_redundancy: output.result( 'status', self.config.name, "minimum_redundancy", "Minimum redundancy requirements", "FAILED (%s/%s)" % (no_backups, self.config.minimum_redundancy)) else: output.result( 'status', self.config.name, "minimum_redundancy", "Minimum redundancy requirements", "satisfied (%s/%s)" % (no_backups, self.config.minimum_redundancy)) # Output additional status defined by the BackupExecutor if self.executor: self.executor.status() def fetch_remote_status(self): """ Build additional remote status lines defined by the BackupManager. This method does not raise any exception in case of errors, but set the missing values to None in the resulting dictionary. :rtype: dict[str, None|str] """ if self.executor: return self.executor.get_remote_status() else: return {} def rebuild_xlogdb(self): """ Rebuild the whole xlog database guessing it from the archive content. """ from os.path import isdir, join output.info("Rebuilding xlogdb for server %s", self.config.name) root = self.config.wals_directory default_compression = self.config.compression wal_count = label_count = history_count = 0 # lock the xlogdb as we are about replacing it completely with self.server.xlogdb('w') as fxlogdb: xlogdb_new = fxlogdb.name + ".new" with open(xlogdb_new, 'w') as fxlogdb_new: for name in sorted(os.listdir(root)): # ignore the xlogdb and its lockfile if name.startswith(self.server.XLOG_DB): continue fullname = join(root, name) if isdir(fullname): # all relevant files are in subdirectories hash_dir = fullname for wal_name in sorted(os.listdir(hash_dir)): fullname = join(hash_dir, wal_name) if isdir(fullname): _logger.warning( 'unexpected directory ' 'rebuilding the wal database: %s', fullname) else: if xlog.is_wal_file(fullname): wal_count += 1 elif xlog.is_backup_file(fullname): label_count += 1 else: _logger.warning( 'unexpected file ' 'rebuilding the wal database: %s', fullname) continue wal_info = WalFileInfo.from_file( fullname, default_compression=default_compression) fxlogdb_new.write(wal_info.to_xlogdb_line()) else: # only history files are here if xlog.is_history_file(fullname): history_count += 1 wal_info = WalFileInfo.from_file( fullname, default_compression=default_compression) fxlogdb_new.write(wal_info.to_xlogdb_line()) else: _logger.warning( 'unexpected file ' 'rebuilding the wal database: %s', fullname) os.fsync(fxlogdb_new.fileno()) shutil.move(xlogdb_new, fxlogdb.name) fsync_dir(os.path.dirname(fxlogdb.name)) output.info( 'Done rebuilding xlogdb for server %s ' '(history: %s, backup_labels: %s, wal_file: %s)', self.config.name, history_count, label_count, wal_count) def remove_wal_before_backup(self, backup_info): """ Remove WAL files which have been archived before the start of the provided backup. If no backup_info is provided delete all available WAL files :param BackupInfo|None backup_info: the backup information structure :return list: a list of removed WAL files """ removed = [] with self.server.xlogdb() as fxlogdb: xlogdb_new = fxlogdb.name + ".new" with open(xlogdb_new, 'w') as fxlogdb_new: for line in fxlogdb: wal_info = WalFileInfo.from_xlogdb_line(line) if not xlog.is_any_xlog_file(wal_info.name): output.error( "invalid xlog segment name %r\n" "HINT: Please run \"barman rebuild-xlogdb %s\" " "to solve this issue", wal_info.name, self.config.name) continue # Keeps the WAL segment if it is a history file or later # than the given backup (the first available) if (xlog.is_history_file(wal_info.name) or (backup_info and wal_info.name >= backup_info.begin_wal)): fxlogdb_new.write(wal_info.to_xlogdb_line()) continue else: self.delete_wal(wal_info) removed.append(wal_info.name) fxlogdb_new.flush() os.fsync(fxlogdb_new.fileno()) shutil.move(xlogdb_new, fxlogdb.name) fsync_dir(os.path.dirname(fxlogdb.name)) return removed def validate_last_backup_maximum_age(self, last_backup_maximum_age): """ Evaluate the age of the last available backup in a catalogue. If the last backup is older than the specified time interval (age), the function returns False. If within the requested age interval, the function returns True. :param timedate.timedelta last_backup_maximum_age: time interval representing the maximum allowed age for the last backup in a server catalogue :return tuple: a tuple containing the boolean result of the check and auxiliary information about the last backup current age """ # Get the ID of the last available backup backup_id = self.get_last_backup_id() if backup_id: # Get the backup object backup = BackupInfo(self.server, backup_id=backup_id) now = datetime.datetime.now(dateutil.tz.tzlocal()) # Evaluate the point of validity validity_time = now - last_backup_maximum_age # Pretty print of a time interval (age) msg = human_readable_timedelta(now - backup.end_time) # If the backup end time is older than the point of validity, # return False, otherwise return true if backup.end_time < validity_time: return False, msg else: return True, msg else: # If no backup is available return false return False, "No available backups" def backup_fsync_and_set_sizes(self, backup_info): """ Fsync all files in a backup and set the actual size on disk of a backup. Also evaluate the deduplication ratio and the deduplicated size if applicable. :param barman.infofile.BackupInfo backup_info: the backup to update """ # Calculate the base backup size self.executor.current_action = "calculating backup size" _logger.debug(self.executor.current_action) backup_size = 0 deduplicated_size = 0 backup_dest = backup_info.get_basebackup_directory() for dir_path, _, file_names in os.walk(backup_dest): # execute fsync() on the containing directory fsync_dir(dir_path) # execute fsync() on all the contained files for filename in file_names: file_path = os.path.join(dir_path, filename) file_fd = os.open(file_path, os.O_RDONLY) file_stat = os.fstat(file_fd) backup_size += file_stat.st_size # Excludes hard links from real backup size if file_stat.st_nlink == 1: deduplicated_size += file_stat.st_size os.fsync(file_fd) os.close(file_fd) # Save size into BackupInfo object backup_info.set_attribute('size', backup_size) backup_info.set_attribute('deduplicated_size', deduplicated_size) if backup_info.size > 0: deduplication_ratio = 1 - (float(backup_info.deduplicated_size) / backup_info.size) else: deduplication_ratio = 0 if self.config.reuse_backup == 'link': output.info("Backup size: %s. Actual size on disk: %s" " (-%s deduplication ratio)." % (pretty_size(backup_info.size), pretty_size(backup_info.deduplicated_size), '{percent:.2%}'.format(percent=deduplication_ratio))) else: output.info("Backup size: %s" % pretty_size(backup_info.size))
class BackupManager(RemoteStatusMixin): """Manager of the backup archive for a server""" DEFAULT_STATUS_FILTER = (BackupInfo.DONE,) def __init__(self, server): """ Constructor """ super(BackupManager, self).__init__() self.server = server self.config = server.config self._backup_cache = None self.compression_manager = CompressionManager(self.config, server.path) self.executor = None try: if self.config.backup_method == "postgres": self.executor = PostgresBackupExecutor(self) else: self.executor = RsyncBackupExecutor(self) except SshCommandException as e: self.config.disabled = True self.config.msg_list.append(str(e).strip()) @property def mode(self): """ Property defining the BackupInfo mode content """ if self.executor: return self.executor.mode return None def get_available_backups(self, status_filter=DEFAULT_STATUS_FILTER): """ Get a list of available backups :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup list returned """ # If the filter is not a tuple, create a tuple using the filter if not isinstance(status_filter, tuple): status_filter = tuple(status_filter,) # Load the cache if necessary if self._backup_cache is None: self._load_backup_cache() # Filter the cache using the status filter tuple backups = {} for key, value in self._backup_cache.items(): if value.status in status_filter: backups[key] = value return backups def _load_backup_cache(self): """ Populate the cache of the available backups, reading information from disk. """ self._backup_cache = {} # Load all the backups from disk reading the backup.info files for filename in glob("%s/*/backup.info" % self.config.basebackups_directory): backup = BackupInfo(self.server, filename) self._backup_cache[backup.backup_id] = backup def backup_cache_add(self, backup_info): """ Register a BackupInfo object to the backup cache. NOTE: Initialise the cache - in case it has not been done yet :param barman.infofile.BackupInfo backup_info: the object we want to register in the cache """ # Load the cache if needed if self._backup_cache is None: self._load_backup_cache() # Insert the BackupInfo object into the cache self._backup_cache[backup_info.backup_id] = backup_info def backup_cache_remove(self, backup_info): """ Remove a BackupInfo object from the backup cache This method _must_ be called after removing the object from disk. :param barman.infofile.BackupInfo backup_info: the object we want to remove from the cache """ # Nothing to do if the cache is not loaded if self._backup_cache is None: return # Remove the BackupInfo object from the backups cache del self._backup_cache[backup_info.backup_id] def get_backup(self, backup_id): """ Return the backup information for the given backup id. If the backup_id is None or backup.info file doesn't exists, it returns None. :param str|None backup_id: the ID of the backup to return :rtype: BackupInfo|None """ if backup_id is not None: # Get all the available backups from the cache available_backups = self.get_available_backups( BackupInfo.STATUS_ALL) # Return the BackupInfo if present, or None return available_backups.get(backup_id) return None def get_previous_backup(self, backup_id, status_filter=DEFAULT_STATUS_FILTER): """ Get the previous backup (if any) in the catalog :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup returned """ if not isinstance(status_filter, tuple): status_filter = tuple(status_filter) backup = BackupInfo(self.server, backup_id=backup_id) available_backups = self.get_available_backups(status_filter + (backup.status,)) ids = sorted(available_backups.keys()) try: current = ids.index(backup_id) while current > 0: res = available_backups[ids[current - 1]] if res.status in status_filter: return res current -= 1 return None except ValueError: raise UnknownBackupIdException('Could not find backup_id %s' % backup_id) def get_next_backup(self, backup_id, status_filter=DEFAULT_STATUS_FILTER): """ Get the next backup (if any) in the catalog :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup returned """ if not isinstance(status_filter, tuple): status_filter = tuple(status_filter) backup = BackupInfo(self.server, backup_id=backup_id) available_backups = self.get_available_backups(status_filter + (backup.status,)) ids = sorted(available_backups.keys()) try: current = ids.index(backup_id) while current < (len(ids) - 1): res = available_backups[ids[current + 1]] if res.status in status_filter: return res current += 1 return None except ValueError: raise UnknownBackupIdException('Could not find backup_id %s' % backup_id) def get_last_backup_id(self, status_filter=DEFAULT_STATUS_FILTER): """ Get the id of the latest/last backup in the catalog (if exists) :param status_filter: The status of the backup to return, default to DEFAULT_STATUS_FILTER. :return string|None: ID of the backup """ available_backups = self.get_available_backups(status_filter) if len(available_backups) == 0: return None ids = sorted(available_backups.keys()) return ids[-1] def get_first_backup_id(self, status_filter=DEFAULT_STATUS_FILTER): """ Get the id of the oldest/first backup in the catalog (if exists) :param status_filter: The status of the backup to return, default to DEFAULT_STATUS_FILTER. :return string|None: ID of the backup """ available_backups = self.get_available_backups(status_filter) if len(available_backups) == 0: return None ids = sorted(available_backups.keys()) return ids[0] def delete_backup(self, backup): """ Delete a backup :param backup: the backup to delete """ available_backups = self.get_available_backups() minimum_redundancy = self.server.config.minimum_redundancy # Honour minimum required redundancy if backup.status == BackupInfo.DONE and \ minimum_redundancy >= len(available_backups): output.warning("Skipping delete of backup %s for server %s " "due to minimum redundancy requirements " "(minimum redundancy = %s, " "current redundancy = %s)", backup.backup_id, self.config.name, len(available_backups), minimum_redundancy) return output.info("Deleting backup %s for server %s", backup.backup_id, self.config.name) previous_backup = self.get_previous_backup(backup.backup_id) next_backup = self.get_next_backup(backup.backup_id) # Delete all the data contained in the backup try: self.delete_backup_data(backup) except OSError as e: output.error("Failure deleting backup %s for server %s.\n%s", backup.backup_id, self.config.name, e) return # Check if we are deleting the first available backup if not previous_backup: # In the case of exclusive backup (default), removes any WAL # files associated to the backup being deleted. # In the case of concurrent backup, removes only WAL files # prior to the start of the backup being deleted, as they # might be useful to any concurrent backup started immediately # after. remove_until = None # means to remove all WAL files if next_backup: remove_until = next_backup elif BackupOptions.CONCURRENT_BACKUP in self.config.backup_options: remove_until = backup timelines_to_protect = set() # If remove_until is not set there are no backup left if remove_until: # Retrieve the list of extra timelines that contains at least # a backup. On such timelines we don't want to delete any WAL for value in self.get_available_backups( BackupInfo.STATUS_ARCHIVING).values(): # Ignore the backup that is being deleted if value == backup: continue timelines_to_protect.add(value.timeline) # Remove the timeline of `remove_until` from the list. # We have enough information to safely delete unused WAL files # on it. timelines_to_protect -= set([remove_until.timeline]) output.info("Delete associated WAL segments:") for name in self.remove_wal_before_backup(remove_until, timelines_to_protect): output.info("\t%s", name) # As last action, remove the backup directory, # ending the delete operation try: self.delete_basebackup(backup) except OSError as e: output.error("Failure deleting backup %s for server %s.\n%s\n" "Please manually remove the '%s' directory", backup.backup_id, self.config.name, e, backup.get_basebackup_directory()) return self.backup_cache_remove(backup) output.info("Done") def retry_backup_copy(self, target_function, *args, **kwargs): """ Execute the target backup copy function, retrying the configured number of times :param target_function: the base backup target function :param args: args for the target function :param kwargs: kwargs of the target function :return: the result of the target function """ attempts = 0 while True: try: # if is not the first attempt, output the retry number if attempts >= 1: output.warning("Copy of base backup: retry #%s", attempts) # execute the target function for backup copy return target_function(*args, **kwargs) # catch rsync errors except DataTransferFailure as e: # exit condition: if retry number is lower than configured # retry limit, try again; otherwise exit. if attempts < self.config.basebackup_retry_times: # Log the exception, for debugging purpose _logger.exception("Failure in base backup copy: %s", e) output.warning( "Copy of base backup failed, waiting for next " "attempt in %s seconds", self.config.basebackup_retry_sleep) # sleep for configured time. then try again time.sleep(self.config.basebackup_retry_sleep) attempts += 1 else: # if the max number of attempts is reached and # there is still an error, exit re-raising the exception. raise def backup(self): """ Performs a backup for the server """ _logger.debug("initialising backup information") self.executor.init() backup_info = None try: # Create the BackupInfo object representing the backup backup_info = BackupInfo( self.server, backup_id=datetime.datetime.now().strftime('%Y%m%dT%H%M%S')) backup_info.save() self.backup_cache_add(backup_info) output.info( "Starting backup using %s method for server %s in %s", self.mode, self.config.name, backup_info.get_basebackup_directory()) # Run the pre-backup-script if present. script = HookScriptRunner(self, 'backup_script', 'pre') script.env_from_backup_info(backup_info) script.run() # Run the pre-backup-retry-script if present. retry_script = RetryHookScriptRunner( self, 'backup_retry_script', 'pre') retry_script.env_from_backup_info(backup_info) retry_script.run() # Do the backup using the BackupExecutor self.executor.backup(backup_info) # Compute backup size and fsync it on disk self.backup_fsync_and_set_sizes(backup_info) # Mark the backup as DONE backup_info.set_attribute("status", "DONE") # Use BaseException instead of Exception to catch events like # KeyboardInterrupt (e.g.: CRTL-C) except BaseException as e: msg_lines = str(e).strip().splitlines() if backup_info: # Use only the first line of exception message # in backup_info error field backup_info.set_attribute("status", "FAILED") # If the exception has no attached message use the raw # type name if len(msg_lines) == 0: msg_lines = [type(e).__name__] backup_info.set_attribute( "error", "failure %s (%s)" % ( self.executor.current_action, msg_lines[0])) output.error("Backup failed %s.\nDETAILS: %s\n%s", self.executor.current_action, msg_lines[0], '\n'.join(msg_lines[1:])) else: output.info("Backup end at xlog location: %s (%s, %08X)", backup_info.end_xlog, backup_info.end_wal, backup_info.end_offset) output.info("Backup completed") # Create a restore point after a backup target_name = 'barman_%s' % backup_info.backup_id self.server.postgres.create_restore_point(target_name) finally: if backup_info: backup_info.save() # Make sure we are not holding any PostgreSQL connection # during the post-backup scripts self.server.close() # Run the post-backup-retry-script if present. try: retry_script = RetryHookScriptRunner( self, 'backup_retry_script', 'post') retry_script.env_from_backup_info(backup_info) retry_script.run() except AbortedRetryHookScript as e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning("Ignoring stop request after receiving " "abort (exit code %d) from post-backup " "retry hook script: %s", e.hook.exit_status, e.hook.script) # Run the post-backup-script if present. script = HookScriptRunner(self, 'backup_script', 'post') script.env_from_backup_info(backup_info) script.run() output.result('backup', backup_info) def recover(self, backup_info, dest, tablespaces=None, target_tli=None, target_time=None, target_xid=None, target_name=None, exclusive=False, remote_command=None): """ Performs a recovery of a backup :param barman.infofile.BackupInfo backup_info: the backup to recover :param str dest: the destination directory :param dict[str,str]|None tablespaces: a tablespace name -> location map (for relocation) :param str|None target_tli: the target timeline :param str|None target_time: the target time :param str|None target_xid: the target xid :param str|None target_name: the target name created previously with pg_create_restore_point() function call :param bool exclusive: whether the recovery is exclusive or not :param str|None remote_command: default None. The remote command to recover the base backup, in case of remote backup. """ # Archive every WAL files in the incoming directory of the server self.server.archive_wal(verbose=False) # Delegate the recovery operation to a RecoveryExecutor object executor = RecoveryExecutor(self) recovery_info = executor.recover(backup_info, dest, tablespaces, target_tli, target_time, target_xid, target_name, exclusive, remote_command) # Output recovery results output.result('recovery', recovery_info['results']) def archive_wal(self, verbose=True): """ Executes WAL maintenance operations, such as archiving and compression If verbose is set to False, outputs something only if there is at least one file :param bool verbose: report even if no actions """ with self.server.xlogdb('a') as fxlogdb: for archiver in self.server.archivers: archiver.archive(fxlogdb, verbose) def cron_retention_policy(self): """ Retention policy management """ if (self.server.enforce_retention_policies and self.config.retention_policy_mode == 'auto'): available_backups = self.get_available_backups( BackupInfo.STATUS_ALL) retention_status = self.config.retention_policy.report() for bid in sorted(retention_status.keys()): if retention_status[bid] == BackupInfo.OBSOLETE: output.info( "Enforcing retention policy: removing backup %s for " "server %s" % (bid, self.config.name)) self.delete_backup(available_backups[bid]) def delete_basebackup(self, backup): """ Delete the basebackup dir of a given backup. :param barman.infofile.BackupInfo backup: the backup to delete """ backup_dir = backup.get_basebackup_directory() _logger.debug("Deleting base backup directory: %s" % backup_dir) shutil.rmtree(backup_dir) def delete_backup_data(self, backup): """ Delete the data contained in a given backup. :param barman.infofile.BackupInfo backup: the backup to delete """ if backup.tablespaces: if backup.backup_version == 2: tbs_dir = backup.get_basebackup_directory() else: tbs_dir = os.path.join(backup.get_data_directory(), 'pg_tblspc') for tablespace in backup.tablespaces: rm_dir = os.path.join(tbs_dir, str(tablespace.oid)) if os.path.exists(rm_dir): _logger.debug("Deleting tablespace %s directory: %s" % (tablespace.name, rm_dir)) shutil.rmtree(rm_dir) pg_data = backup.get_data_directory() if os.path.exists(pg_data): _logger.debug("Deleting PGDATA directory: %s" % pg_data) shutil.rmtree(pg_data) def delete_wal(self, wal_info): """ Delete a WAL segment, with the given WalFileInfo :param barman.infofile.WalFileInfo wal_info: the WAL to delete """ try: os.unlink(wal_info.fullpath(self.server)) try: os.removedirs(os.path.dirname(wal_info.fullpath(self.server))) except OSError: # This is not an error condition # We always try to remove the the trailing directories, # this means that hashdir is not empty. pass except OSError as e: output.warning('Ignoring deletion of WAL file %s ' 'for server %s: %s', wal_info.name, self.config.name, e) def check(self, check_strategy): """ This function does some checks on the server. :param CheckStrategy check_strategy: the strategy for the management of the results of the various checks """ # Check compression_setting parameter if self.config.compression and not self.compression_manager.check(): check_strategy.result(self.config.name, 'compression settings', False) else: status = True try: self.compression_manager.get_compressor() except CompressionIncompatibility as field: check_strategy.result(self.config.name, '%s setting' % field, False) status = False check_strategy.result(self.config.name, 'compression settings', status) # Failed backups check failed_backups = self.get_available_backups((BackupInfo.FAILED,)) status = len(failed_backups) == 0 check_strategy.result( self.config.name, 'failed backups', status, 'there are %s failed backups' % (len(failed_backups,)) ) # Minimum redundancy checks no_backups = len(self.get_available_backups()) # Check minimum_redundancy_requirements parameter if no_backups < int(self.config.minimum_redundancy): status = False else: status = True check_strategy.result( self.config.name, 'minimum redundancy requirements', status, 'have %s backups, expected at least %s' % ( no_backups, self.config.minimum_redundancy)) # TODO: Add a check for the existence of ssh and of rsync # Execute additional checks defined by the BackupExecutor if self.executor: self.executor.check(check_strategy) def status(self): """ This function show the server status """ # get number of backups no_backups = len(self.get_available_backups()) output.result('status', self.config.name, "backups_number", "No. of available backups", no_backups) output.result('status', self.config.name, "first_backup", "First available backup", self.get_first_backup_id()) output.result('status', self.config.name, "last_backup", "Last available backup", self.get_last_backup_id()) # Minimum redundancy check. if number of backups minor than minimum # redundancy, fail. if no_backups < self.config.minimum_redundancy: output.result('status', self.config.name, "minimum_redundancy", "Minimum redundancy requirements", "FAILED (%s/%s)" % ( no_backups, self.config.minimum_redundancy)) else: output.result('status', self.config.name, "minimum_redundancy", "Minimum redundancy requirements", "satisfied (%s/%s)" % ( no_backups, self.config.minimum_redundancy)) # Output additional status defined by the BackupExecutor if self.executor: self.executor.status() def fetch_remote_status(self): """ Build additional remote status lines defined by the BackupManager. This method does not raise any exception in case of errors, but set the missing values to None in the resulting dictionary. :rtype: dict[str, None|str] """ if self.executor: return self.executor.get_remote_status() else: return {} def rebuild_xlogdb(self): """ Rebuild the whole xlog database guessing it from the archive content. """ from os.path import isdir, join output.info("Rebuilding xlogdb for server %s", self.config.name) root = self.config.wals_directory default_compression = self.config.compression wal_count = label_count = history_count = 0 # lock the xlogdb as we are about replacing it completely with self.server.xlogdb('w') as fxlogdb: xlogdb_new = fxlogdb.name + ".new" with open(xlogdb_new, 'w') as fxlogdb_new: for name in sorted(os.listdir(root)): # ignore the xlogdb and its lockfile if name.startswith(self.server.XLOG_DB): continue fullname = join(root, name) if isdir(fullname): # all relevant files are in subdirectories hash_dir = fullname for wal_name in sorted(os.listdir(hash_dir)): fullname = join(hash_dir, wal_name) if isdir(fullname): _logger.warning( 'unexpected directory ' 'rebuilding the wal database: %s', fullname) else: if xlog.is_wal_file(fullname): wal_count += 1 elif xlog.is_backup_file(fullname): label_count += 1 else: _logger.warning( 'unexpected file ' 'rebuilding the wal database: %s', fullname) continue wal_info = WalFileInfo.from_file( fullname, default_compression=default_compression) fxlogdb_new.write(wal_info.to_xlogdb_line()) else: # only history files are here if xlog.is_history_file(fullname): history_count += 1 wal_info = WalFileInfo.from_file( fullname, default_compression=default_compression) fxlogdb_new.write(wal_info.to_xlogdb_line()) else: _logger.warning( 'unexpected file ' 'rebuilding the wal database: %s', fullname) os.fsync(fxlogdb_new.fileno()) shutil.move(xlogdb_new, fxlogdb.name) fsync_dir(os.path.dirname(fxlogdb.name)) output.info('Done rebuilding xlogdb for server %s ' '(history: %s, backup_labels: %s, wal_file: %s)', self.config.name, history_count, label_count, wal_count) def remove_wal_before_backup(self, backup_info, timelines_to_protect=None): """ Remove WAL files which have been archived before the start of the provided backup. If no backup_info is provided delete all available WAL files If timelines_to_protect list is passed, never remove a wal in one of these timelines. :param BackupInfo|None backup_info: the backup information structure :param set timelines_to_protect: optional list of timelines to protect :return list: a list of removed WAL files """ removed = [] with self.server.xlogdb() as fxlogdb: xlogdb_new = fxlogdb.name + ".new" with open(xlogdb_new, 'w') as fxlogdb_new: for line in fxlogdb: wal_info = WalFileInfo.from_xlogdb_line(line) if not xlog.is_any_xlog_file(wal_info.name): output.error( "invalid xlog segment name %r\n" "HINT: Please run \"barman rebuild-xlogdb %s\" " "to solve this issue", wal_info.name, self.config.name) continue # Keeps the WAL segment if it is a history file keep = xlog.is_history_file(wal_info.name) # Keeps the WAL segment if its timeline is in # `timelines_to_protect` if timelines_to_protect: tli, _, _ = xlog.decode_segment_name(wal_info.name) keep |= tli in timelines_to_protect # Keeps the WAL segment if it is a newer # than the given backup (the first available) if backup_info: keep |= wal_info.name >= backup_info.begin_wal # If the file has to be kept write it in the new xlogdb # otherwise delete it and record it in the removed list if keep: fxlogdb_new.write(wal_info.to_xlogdb_line()) else: self.delete_wal(wal_info) removed.append(wal_info.name) fxlogdb_new.flush() os.fsync(fxlogdb_new.fileno()) shutil.move(xlogdb_new, fxlogdb.name) fsync_dir(os.path.dirname(fxlogdb.name)) return removed def validate_last_backup_maximum_age(self, last_backup_maximum_age): """ Evaluate the age of the last available backup in a catalogue. If the last backup is older than the specified time interval (age), the function returns False. If within the requested age interval, the function returns True. :param timedate.timedelta last_backup_maximum_age: time interval representing the maximum allowed age for the last backup in a server catalogue :return tuple: a tuple containing the boolean result of the check and auxiliary information about the last backup current age """ # Get the ID of the last available backup backup_id = self.get_last_backup_id() if backup_id: # Get the backup object backup = BackupInfo(self.server, backup_id=backup_id) now = datetime.datetime.now(dateutil.tz.tzlocal()) # Evaluate the point of validity validity_time = now - last_backup_maximum_age # Pretty print of a time interval (age) msg = human_readable_timedelta(now - backup.end_time) # If the backup end time is older than the point of validity, # return False, otherwise return true if backup.end_time < validity_time: return False, msg else: return True, msg else: # If no backup is available return false return False, "No available backups" def backup_fsync_and_set_sizes(self, backup_info): """ Fsync all files in a backup and set the actual size on disk of a backup. Also evaluate the deduplication ratio and the deduplicated size if applicable. :param barman.infofile.BackupInfo backup_info: the backup to update """ # Calculate the base backup size self.executor.current_action = "calculating backup size" _logger.debug(self.executor.current_action) backup_size = 0 deduplicated_size = 0 backup_dest = backup_info.get_basebackup_directory() for dir_path, _, file_names in os.walk(backup_dest): # execute fsync() on the containing directory fsync_dir(dir_path) # execute fsync() on all the contained files for filename in file_names: file_path = os.path.join(dir_path, filename) file_fd = os.open(file_path, os.O_RDONLY) file_stat = os.fstat(file_fd) backup_size += file_stat.st_size # Excludes hard links from real backup size if file_stat.st_nlink == 1: deduplicated_size += file_stat.st_size os.fsync(file_fd) os.close(file_fd) # Save size into BackupInfo object backup_info.set_attribute('size', backup_size) backup_info.set_attribute('deduplicated_size', deduplicated_size) if backup_info.size > 0: deduplication_ratio = 1 - (float( backup_info.deduplicated_size) / backup_info.size) else: deduplication_ratio = 0 if self.config.reuse_backup == 'link': output.info( "Backup size: %s. Actual size on disk: %s" " (-%s deduplication ratio)." % ( pretty_size(backup_info.size), pretty_size(backup_info.deduplicated_size), '{percent:.2%}'.format(percent=deduplication_ratio) )) else: output.info("Backup size: %s" % pretty_size(backup_info.size))
def test_compression_manager_creation(self): # prepare mock obj config_mock = mock.Mock() comp_manager = CompressionManager(config_mock, None) assert comp_manager
class BackupManager(object): '''Manager of the backup archive for a server''' DEFAULT_STATUS_FILTER = (BackupInfo.DONE,) DANGEROUS_OPTIONS = ['data_directory', 'config_file', 'hba_file', 'ident_file', 'external_pid_file', 'ssl_cert_file', 'ssl_key_file', 'ssl_ca_file', 'ssl_crl_file', 'unix_socket_directory'] def __init__(self, server): '''Constructor''' self.name = "default" self.server = server self.config = server.config self.available_backups = {} self.compression_manager = CompressionManager(self.config) # used for error messages self.current_action = None def get_available_backups(self, status_filter=DEFAULT_STATUS_FILTER): ''' Get a list of available backups :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup list returned ''' if not isinstance(status_filter, tuple): status_filter = tuple(status_filter,) if status_filter not in self.available_backups: available_backups = {} for filename in glob("%s/*/backup.info" % self.config.basebackups_directory): backup = BackupInfo(self.server, filename) if backup.status not in status_filter: continue available_backups[backup.backup_id] = backup self.available_backups[status_filter] = available_backups return available_backups else: return self.available_backups[status_filter] def get_previous_backup(self, backup_id, status_filter=DEFAULT_STATUS_FILTER): ''' Get the previous backup (if any) in the catalog :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup returned ''' if not isinstance(status_filter, tuple): status_filter = tuple(status_filter) backup = BackupInfo(self.server, backup_id=backup_id) available_backups = self.get_available_backups(status_filter + (backup.status,)) ids = sorted(available_backups.keys()) try: current = ids.index(backup_id) while current > 0: res = available_backups[ids[current - 1]] if res.status in status_filter: return res current -= 1 else: return None except ValueError: raise Exception('Could not find backup_id %s' % backup_id) def get_next_backup(self, backup_id, status_filter=DEFAULT_STATUS_FILTER): ''' Get the next backup (if any) in the catalog :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup returned ''' if not isinstance(status_filter, tuple): status_filter = tuple(status_filter) backup = BackupInfo(self.server, backup_id=backup_id) available_backups = self.get_available_backups(status_filter + (backup.status,)) ids = sorted(available_backups.keys()) try: current = ids.index(backup_id) while current < (len(ids) - 1): res = available_backups[ids[current + 1]] if res.status in status_filter: return res current += 1 else: return None except ValueError: raise Exception('Could not find backup_id %s' % backup_id) def get_last_backup(self, status_filter=DEFAULT_STATUS_FILTER): ''' Get the last backup (if any) in the catalog :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup returned ''' available_backups = self.get_available_backups(status_filter) if len(available_backups) == 0: return None ids = sorted(available_backups.keys()) return ids[-1] def get_first_backup(self, status_filter=DEFAULT_STATUS_FILTER): ''' Get the first backup (if any) in the catalog :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup returned ''' available_backups = self.get_available_backups(status_filter) if len(available_backups) == 0: return None ids = sorted(available_backups.keys()) return ids[0] def delete_backup(self, backup): ''' Delete a backup :param backup: the backup to delete ''' available_backups = self.get_available_backups() # Honour minimum required redundancy if backup.status == BackupInfo.DONE and self.server.config.minimum_redundancy >= len(available_backups): yield "Skipping delete of backup %s for server %s due to minimum redundancy requirements (%s)" % ( backup.backup_id, self.config.name, self.server.config.minimum_redundancy) _logger.warning("Could not delete backup %s for server %s - minimum redundancy = %s, current size = %s" % (backup.backup_id, self.config.name, self.server.config.minimum_redundancy, len(available_backups))) return yield "Deleting backup %s for server %s" % (backup.backup_id, self.config.name) previous_backup = self.get_previous_backup(backup.backup_id) next_backup = self.get_next_backup(backup.backup_id) # remove the backup self.delete_basebackup(backup) if not previous_backup: # backup is the first one yield "Delete associated WAL segments:" remove_until = None if next_backup: remove_until = next_backup.begin_wal with self.server.xlogdb() as fxlogdb: xlogdb_new = fxlogdb.name + ".new" with open(xlogdb_new, 'w') as fxlogdb_new: for line in fxlogdb: name, _, _, _ = self.server.xlogdb_parse_line(line) if remove_until and name >= remove_until: fxlogdb_new.write(line) continue else: yield "\t%s" % name # Delete the WAL segment self.delete_wal(name) os.rename(xlogdb_new, fxlogdb.name) yield "Done" def build_script_env(self, backup_info, phase): """ Prepare the environment for executing a script """ previous_backup = self.get_previous_backup(backup_info.backup_id) env = {} env['BARMAN_BACKUP_DIR'] = backup_info.get_basebackup_directory() env['BARMAN_SERVER'] = self.config.name env['BARMAN_CONFIGURATION'] = self.config.config.config_file env['BARMAN_BACKUP_ID'] = backup_info.backup_id env['BARMAN_PREVIOUS_ID'] = previous_backup.backup_id if previous_backup else '' env['BARMAN_PHASE'] = phase env['BARMAN_STATUS'] = backup_info.status env['BARMAN_ERROR'] = backup_info.error or '' env['BARMAN_VERSION'] = version.__version__ return env def run_pre_backup_script(self, backup_info): ''' Run the pre_backup_script if configured. This method must never throw any exception ''' try: script = self.config.pre_backup_script if script: _logger.info("Attempt to run pre_backup_script: %s", script) cmd = Command( script, env_append=self.build_script_env(backup_info, 'pre'), shell=True, check=False) ret = cmd() _logger.info("pre_backup_script returned %d", ret) except Exception: _logger.exception('Exception running pre_backup_script') def run_post_backup_script(self, backup_info): ''' Run the post_backup_script if configured. This method must never throw any exception ''' try: script = self.config.post_backup_script if script: _logger.info("Attempt to run post_backup_script: %s", script) cmd = Command( script, env_append=self.build_script_env(backup_info, 'post'), shell=True, check=False) ret = cmd() _logger.info("post_backup_script returned %d", ret) except Exception: _logger.exception('Exception running post_backup_script') def backup(self): ''' Performs a backup for the server ''' _logger.debug("initialising backup information") backup_stamp = datetime.datetime.now() self.current_action = "starting backup" backup_info = None try: backup_info = BackupInfo(self.server, backup_id=backup_stamp.strftime('%Y%m%dT%H%M%S')) backup_info.save() msg = "Starting backup for server %s in %s" % (self.config.name, backup_info.get_basebackup_directory()) _logger.info(msg) yield msg # Run the pre-backup-script if present. self.run_pre_backup_script(backup_info) # Start the backup self.backup_start(backup_info) backup_info.set_attribute("begin_time", backup_stamp) backup_info.save() msg = "Backup start at xlog location: %s (%s, %08X)" % (backup_info.begin_xlog, backup_info.begin_wal, backup_info.begin_offset) yield msg _logger.info(msg) self.current_action = "copying files" _logger.debug(self.current_action) try: # Start the copy msg = "Copying files." yield msg _logger.info(msg) backup_size = self.backup_copy(backup_info) backup_info.set_attribute("size", backup_size) msg = "Copy done." yield msg _logger.info(msg) except: raise else: self.current_action = "issuing stop of the backup" msg = "Asking PostgreSQL server to finalize the backup." yield msg _logger.info(msg) finally: self.backup_stop(backup_info) backup_info.set_attribute("status", "DONE") except: if backup_info: backup_info.set_attribute("status", "FAILED") backup_info.set_attribute("error", "failure %s" % self.current_action) msg = "Backup failed %s" % self.current_action _logger.exception(msg) raise Exception("ERROR: %s" % msg) else: msg = "Backup end at xlog location: %s (%s, %08X)" % (backup_info.end_xlog, backup_info.end_wal, backup_info.end_offset) _logger.info(msg) yield msg msg = "Backup completed" _logger.info(msg) yield msg finally: if backup_info: backup_info.save() # Run the post-backup-script if present. self.run_post_backup_script(backup_info) def recover(self, backup, dest, tablespaces, target_tli, target_time, target_xid, exclusive, remote_command): ''' Performs a recovery of a backup :param backup: the backup to recover :param dest: the destination directory :param tablespaces: a dictionary of tablespaces :param target_tli: the target timeline :param target_time: the target time :param target_xid: the target xid :param exclusive: whether the recovery is exlusive or not :param remote_command: default None. The remote command to recover the base backup, in case of remote backup. ''' for line in self.cron(False): yield line recovery_dest = 'local' if remote_command: recovery_dest = 'remote' rsync = RsyncPgData(ssh=remote_command) msg = "Starting %s restore for server %s using backup %s " % (recovery_dest, self.config.name, backup.backup_id) yield msg _logger.info(msg) msg = "Destination directory: %s" % dest yield msg _logger.info(msg) if backup.tablespaces: if remote_command: # TODO: remote dir preparation msg = "Skipping remote directory preparation, you must have done it by yourself." yield msg _logger.warning(msg) else: tblspc_dir = os.path.join(dest, 'pg_tblspc') if not os.path.exists(tblspc_dir): os.makedirs(tblspc_dir) for name, oid, location in backup.tablespaces: try: if name in tablespaces: location = tablespaces[name] tblspc_file = os.path.join(tblspc_dir, str(oid)) if os.path.exists(tblspc_file): os.unlink(tblspc_file) if os.path.exists(location) and not os.path.isdir(location): os.unlink(location) if not os.path.exists(location): os.makedirs(location) # test permissiones barman_write_check_file = os.path.join(location, '.barman_write_check') file(barman_write_check_file, 'a').close() os.unlink(barman_write_check_file) os.symlink(location, tblspc_file) except: msg = "ERROR: unable to prepare '%s' tablespace (destination '%s')" % (name, location) _logger.critical(msg) raise SystemExit(msg) yield "\t%s, %s, %s" % (oid, name, location) target_epoch = None if target_time: try: target_datetime = dateutil.parser.parse(target_time) except: msg = "ERROR: unable to parse the target time parameter %r" % target_time _logger.critical(msg) raise SystemExit(msg) target_epoch = time.mktime(target_datetime.timetuple()) + (target_datetime.microsecond / 1000000.) if target_time or target_xid or (target_tli and target_tli != backup.timeline): targets = {} if target_time: targets['time'] = str(target_datetime) if target_xid: targets['xid'] = str(target_xid) if target_tli and target_tli != backup.timeline: targets['timeline'] = str(target_tli) yield "Doing PITR. Recovery target %s" % \ (", ".join(["%s: %r" % (k, v) for k, v in targets.items()])) # Copy the base backup msg = "Copying the base backup." yield msg _logger.info(msg) self.recover_basebackup_copy(backup, dest, remote_command) _logger.info("Base backup copied.") # Prepare WAL segments local directory msg = "Copying required wal segments." _logger.info(msg) yield msg if target_time or target_xid or (target_tli and target_tli != backup.timeline): wal_dest = os.path.join(dest, 'barman_xlog') else: wal_dest = os.path.join(dest, 'pg_xlog') # Retrieve the list of required WAL segments according to recovery options xlogs = {} required_xlog_files = tuple(self.server.get_required_xlog_files(backup, target_tli, target_epoch, target_xid)) for filename in required_xlog_files: hashdir = xlog.hash_dir(filename) if hashdir not in xlogs: xlogs[hashdir] = [] xlogs[hashdir].append(filename) # Check decompression options decompressor = self.compression_manager.get_decompressor() # Restore WAL segments self.recover_xlog_copy(decompressor, xlogs, wal_dest, remote_command) _logger.info("Wal segmets copied.") # Generate recovery.conf file (only if needed by PITR) if target_time or target_xid or (target_tli and target_tli != backup.timeline): msg = "Generating recovery.conf" yield msg _logger.info(msg) if remote_command: tempdir = tempfile.mkdtemp(prefix='barman_recovery-') recovery = open(os.path.join(tempdir, 'recovery.conf'), 'w') else: recovery = open(os.path.join(dest, 'recovery.conf'), 'w') print >> recovery, "restore_command = 'cp barman_xlog/%f %p'" print >> recovery, "recovery_end_command = 'rm -fr barman_xlog'" if target_time: print >> recovery, "recovery_target_time = '%s'" % target_time if target_tli: print >> recovery, "recovery_target_timeline = %s" % target_tli if target_xid: print >> recovery, "recovery_target_xid = '%s'" % target_xid if exclusive: print >> recovery, "recovery_target_inclusive = '%s'" % (not exclusive) recovery.close() if remote_command: recovery = rsync.from_file_list(['recovery.conf'], tempdir, ':%s' % dest) shutil.rmtree(tempdir) _logger.info('recovery.conf generated') else: # avoid shipping of just recovered pg_xlog files if remote_command: status_dir = tempfile.mkdtemp(prefix='barman_xlog_status-') else: status_dir = os.path.join(wal_dest, 'archive_status') os.makedirs(status_dir) # no need to check, it must not exist for filename in required_xlog_files: with file(os.path.join(status_dir, "%s.done" % filename), 'a') as f: f.write('') if remote_command: retval = rsync('%s/' % status_dir, ':%s' % os.path.join(wal_dest, 'archive_status')) if retval != 0: msg = "WARNING: unable to populate pg_xlog/archive_status dorectory" yield msg _logger.warning(msg) shutil.rmtree(status_dir) # Disable dangerous setting in the target data dir if remote_command: tempdir = tempfile.mkdtemp(prefix='barman_recovery-') pg_config = os.path.join(tempdir, 'postgresql.conf') shutil.copy2(os.path.join(backup.get_basebackup_directory(), 'pgdata', 'postgresql.conf'), pg_config) else: pg_config = os.path.join(dest, 'postgresql.conf') if self.pg_config_mangle(pg_config, {'archive_command': 'false'}, "%s.origin" % pg_config): msg = "The archive_command was set to 'false' to prevent data losses." yield msg _logger.info(msg) # Find dangerous options in the configuration file (locations) clashes = self.pg_config_detect_possible_issues(pg_config) if remote_command: recovery = rsync.from_file_list(['postgresql.conf', 'postgresql.conf.origin'], tempdir, ':%s' % dest) shutil.rmtree(tempdir) yield "" yield "Your PostgreSQL server has been successfully prepared for recovery!" yield "" yield "Please review network and archive related settings in the PostgreSQL" yield "configuration file before starting the just recovered instance." yield "" if clashes: yield "WARNING: Before starting up the recovered PostgreSQL server," yield "please review also the settings of the following configuration" yield "options as they might interfere with your current recovery attempt:" yield "" for name, value in sorted(clashes.items()): yield " %s = %s" % (name, value) yield "" _logger.info("Recovery completed successful.") def cron(self, verbose): ''' Executes maintenance operations, such as WAL trashing. :param verbose: print some information ''' found = False compressor = self.compression_manager.get_compressor() with self.server.xlogdb('a') as fxlogdb: if verbose: yield "Processing xlog segments for %s" % self.config.name available_backups = self.get_available_backups(BackupInfo.STATUS_ALL) for filename in sorted(glob(os.path.join(self.config.incoming_wals_directory, '*'))): if not found and not verbose: yield "Processing xlog segments for %s" % self.config.name found = True if not len(available_backups): msg = "No base backup available. Trashing file %s" % os.path.basename(filename) yield "\t%s" % msg _logger.warning(msg) os.unlink(filename) continue # Archive the WAL file basename, size, time = self.cron_wal_archival(compressor, filename) # Updates the information of the WAL archive with the latest segement's fxlogdb.write("%s\t%s\t%s\t%s\n" % (basename, size, time, self.config.compression)) _logger.info('Processed file %s', filename) yield "\t%s" % os.path.basename(filename) if not found and verbose: yield "\tno file found" # Retention policy management if self.server.enforce_retention_policies and self.config.retention_policy_mode == 'auto': available_backups = self.get_available_backups(BackupInfo.STATUS_ALL) retention_status = self.config.retention_policy.report() for bid in sorted(retention_status.iterkeys()): if retention_status[bid] == BackupInfo.OBSOLETE: _logger.info("Enforcing retention policy: removing backup %s for server %s" % ( bid, self.config.name)) for line in self.delete_backup(available_backups[bid]): yield line # # Hooks # def delete_basebackup(self, backup): ''' Delete the given base backup :param backup: the backup to delete ''' backup_dir = backup.get_basebackup_directory(); shutil.rmtree(backup_dir) def delete_wal(self, name): ''' Delete a WAL segment, with the given name :param name: the name of the WAL to delete ''' hashdir = os.path.join(self.config.wals_directory, xlog.hash_dir(name)) try: os.unlink(os.path.join(hashdir, name)) try: os.removedirs(hashdir) except: pass except: _logger.warning('Expected WAL file %s not found during delete', name) def backup_start(self, backup_info): ''' Start of the backup :param backup_info: the backup information structure ''' self.current_action = "connecting to database (%s)" % self.config.conninfo _logger.debug(self.current_action) # Set the PostgreSQL data directory self.current_action = "detecting data directory" _logger.debug(self.current_action) data_directory = self.server.get_pg_setting('data_directory') backup_info.set_attribute('pgdata', data_directory) # Set server version backup_info.set_attribute('version', self.server.server_version) # Set configuration files location cf = self.server.get_pg_configuration_files() if cf: for key in sorted(cf.keys()): backup_info.set_attribute(key, cf[key]) # Get server version and tablespaces information self.current_action = "detecting tablespaces" _logger.debug(self.current_action) tablespaces = self.server.get_pg_tablespaces() if tablespaces and len(tablespaces) > 0: backup_info.set_attribute("tablespaces", tablespaces) for oid, name, location in tablespaces: msg = "\t%s, %s, %s" % (oid, name, location) _logger.info(msg) # Issue pg_start_backup on the PostgreSQL server self.current_action = "issuing pg_start_backup command" _logger.debug(self.current_action) start_row = self.server.pg_start_backup() if start_row: start_xlog, start_file_name, start_file_offset = start_row backup_info.set_attribute("status", "STARTED") backup_info.set_attribute("timeline", int(start_file_name[0:8], 16)) backup_info.set_attribute("begin_xlog", start_xlog) backup_info.set_attribute("begin_wal", start_file_name) backup_info.set_attribute("begin_offset", start_file_offset) else: self.current_action = "starting the backup: PostgreSQL server is already in exclusive backup mode" raise Exception('concurrent exclusive backups are not allowed') def backup_copy(self, backup_info): ''' Perform the copy of the backup. This function returns the size of the backup (in bytes) :param backup_info: the backup information structure ''' backup_dest = os.path.join(backup_info.get_basebackup_directory(), 'pgdata') if self.server.basebackups_method == 'remote_rsync': rsync = RsyncPgDataRemote(server=self.config.name, user=self.server.ssh_user) source = "%s/" % (backup_info.pgdata) destination = "%s/%s/%s/" % (self.server.basebackups_rsync_uri, backup_info.backup_id, 'pgdata') retval = rsync(source, destination) else: rsync = RsyncPgData(ssh=self.server.ssh_command, ssh_options=self.server.ssh_options) retval = rsync(':%s/' % backup_info.pgdata, backup_dest) if retval not in (0, 24): msg = "ERROR: data transfer failure" _logger.exception(msg) raise Exception(retval) # Copy configuration files (if not inside PGDATA) self.current_action = "copying configuration files" _logger.debug(self.current_action) cf = self.server.get_pg_configuration_files() if cf: for key in sorted(cf.keys()): # Consider only those that reside outside of the original PGDATA if cf[key]: if cf[key].find(backup_info.pgdata) == 0: self.current_action = "skipping %s as contained in %s directory" % (key, backup_info.pgdata) _logger.debug(self.current_action) continue else: self.current_action = "copying %s as outside %s directory" % (key, backup_info.pgdata) _logger.info(self.current_action) retval = rsync(':%s' % cf[key], backup_dest) if retval not in (0, 24): raise Exception("ERROR: data transfer failure") self.current_action = "calculating backup size" _logger.debug(self.current_action) backup_size = 0 for dirpath, _, filenames in os.walk(backup_dest): for f in filenames: fp = os.path.join(dirpath, f) backup_size += os.path.getsize(fp) return backup_size def backup_stop(self, backup_info): ''' Stop the backup :param backup_info: the backup information structure ''' stop_xlog, stop_file_name, stop_file_offset = self.server.pg_stop_backup() backup_info.set_attribute("end_time", datetime.datetime.now()) backup_info.set_attribute("end_xlog", stop_xlog) backup_info.set_attribute("end_wal", stop_file_name) backup_info.set_attribute("end_offset", stop_file_offset) def recover_basebackup_copy(self, backup, dest, remote_command=None): ''' Perform the actual copy of the base backup for recovery purposes :param backup: the backup to recover :param dest: the destination directory :param remote_command: default None. The remote command to recover the base backup, in case of remote backup. ''' rsync = RsyncPgData(ssh=remote_command) sourcedir = '%s/' % os.path.join(backup.get_basebackup_directory(), 'pgdata') if remote_command: dest = ':%s' % dest retval = rsync(sourcedir, dest) if retval != 0: raise Exception("ERROR: data transfer failure") # TODO: Manage different location for configuration files that were not within the data directory def recover_xlog_copy(self, decompressor, xlogs, wal_dest, remote_command=None): ''' Restore WAL segments :param decompressor: the decompressor for the file (if any) :param xlogs: the xlog dictionary to recover :param wal_dest: the destination directory for xlog recover :param remote_command: default None. The remote command to recover the xlog, in case of remote backup. ''' rsync = RsyncPgData(ssh=remote_command) if remote_command: # If remote recovery tell rsync to copy them remotely wal_dest = ':%s' % wal_dest else: # we will not use rsync: destdir must exists if not os.path.exists(wal_dest): os.makedirs(wal_dest) if decompressor and remote_command: xlog_spool = tempfile.mkdtemp(prefix='barman_xlog-') for prefix in xlogs: source_dir = os.path.join(self.config.wals_directory, prefix) if decompressor: if remote_command: for segment in xlogs[prefix]: decompressor(os.path.join(source_dir, segment), os.path.join(xlog_spool, segment)) rsync.from_file_list(xlogs[prefix], xlog_spool, wal_dest) for segment in xlogs[prefix]: os.unlink(os.path.join(xlog_spool, segment)) else: # decompress directly to the right place for segment in xlogs[prefix]: decompressor(os.path.join(source_dir, segment), os.path.join(wal_dest, segment)) else: rsync.from_file_list(xlogs[prefix], "%s/" % os.path.join(self.config.wals_directory, prefix), wal_dest) if decompressor and remote_command: shutil.rmtree(xlog_spool) def cron_wal_archival(self, compressor, filename): ''' Archive a WAL segment from the incoming directory. This function returns the name, the size and the time of the WAL file. :param compressor: the compressor for the file (if any) :param filename: the name of the WAthe name of the WAL ''' basename = os.path.basename(filename) destdir = os.path.join(self.config.wals_directory, xlog.hash_dir(basename)) destfile = os.path.join(destdir, basename) time = os.stat(filename).st_mtime if not os.path.isdir(destdir): os.makedirs(destdir) if compressor: compressor(filename, destfile) shutil.copystat(filename, destfile) os.unlink(filename) else: os.rename(filename, destfile) return basename, os.stat(destfile).st_size, time def check(self): ''' This function performs some checks on the server. Returns 0 if all went well, 1 if any of the checks fails ''' if not self.compression_manager.check(): yield ("\tcompression settings: FAILED", False) else: status = 'OK' try: self.compression_manager.get_compressor() except CompressionIncompatibility, field: yield ("\tcompressor settings '%s': FAILED" % field, False) status = 'FAILED' try: self.compression_manager.get_decompressor() except CompressionIncompatibility, field: yield ("\tdecompressor settings '%s': FAILED" % field, False) status = 'FAILED' yield ("\tcompression settings: %s" % status, status == 'OK')
class BackupManager(RemoteStatusMixin): """Manager of the backup archive for a server""" DEFAULT_STATUS_FILTER = BackupInfo.STATUS_COPY_DONE def __init__(self, server): """ Constructor """ super(BackupManager, self).__init__() self.server = server self.config = server.config self._backup_cache = None self.compression_manager = CompressionManager(self.config, server.path) self.executor = None try: if server.passive_node: self.executor = PassiveBackupExecutor(self) elif self.config.backup_method == "postgres": self.executor = PostgresBackupExecutor(self) elif self.config.backup_method == "local-rsync": self.executor = RsyncBackupExecutor(self, local_mode=True) else: self.executor = RsyncBackupExecutor(self) except SshCommandException as e: self.config.disabled = True self.config.msg_list.append(force_str(e).strip()) @property def mode(self): """ Property defining the BackupInfo mode content """ if self.executor: return self.executor.mode return None def get_available_backups(self, status_filter=DEFAULT_STATUS_FILTER): """ Get a list of available backups :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup list returned """ # If the filter is not a tuple, create a tuple using the filter if not isinstance(status_filter, tuple): status_filter = tuple(status_filter, ) # Load the cache if necessary if self._backup_cache is None: self._load_backup_cache() # Filter the cache using the status filter tuple backups = {} for key, value in self._backup_cache.items(): if value.status in status_filter: backups[key] = value return backups def _load_backup_cache(self): """ Populate the cache of the available backups, reading information from disk. """ self._backup_cache = {} # Load all the backups from disk reading the backup.info files for filename in glob("%s/*/backup.info" % self.config.basebackups_directory): backup = LocalBackupInfo(self.server, filename) self._backup_cache[backup.backup_id] = backup def backup_cache_add(self, backup_info): """ Register a BackupInfo object to the backup cache. NOTE: Initialise the cache - in case it has not been done yet :param barman.infofile.BackupInfo backup_info: the object we want to register in the cache """ # Load the cache if needed if self._backup_cache is None: self._load_backup_cache() # Insert the BackupInfo object into the cache self._backup_cache[backup_info.backup_id] = backup_info def backup_cache_remove(self, backup_info): """ Remove a BackupInfo object from the backup cache This method _must_ be called after removing the object from disk. :param barman.infofile.BackupInfo backup_info: the object we want to remove from the cache """ # Nothing to do if the cache is not loaded if self._backup_cache is None: return # Remove the BackupInfo object from the backups cache del self._backup_cache[backup_info.backup_id] def get_backup(self, backup_id): """ Return the backup information for the given backup id. If the backup_id is None or backup.info file doesn't exists, it returns None. :param str|None backup_id: the ID of the backup to return :rtype: BackupInfo|None """ if backup_id is not None: # Get all the available backups from the cache available_backups = self.get_available_backups( BackupInfo.STATUS_ALL) # Return the BackupInfo if present, or None return available_backups.get(backup_id) return None def get_previous_backup(self, backup_id, status_filter=DEFAULT_STATUS_FILTER): """ Get the previous backup (if any) in the catalog :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup returned """ if not isinstance(status_filter, tuple): status_filter = tuple(status_filter) backup = LocalBackupInfo(self.server, backup_id=backup_id) available_backups = self.get_available_backups(status_filter + (backup.status, )) ids = sorted(available_backups.keys()) try: current = ids.index(backup_id) while current > 0: res = available_backups[ids[current - 1]] if res.status in status_filter: return res current -= 1 return None except ValueError: raise UnknownBackupIdException('Could not find backup_id %s' % backup_id) def get_next_backup(self, backup_id, status_filter=DEFAULT_STATUS_FILTER): """ Get the next backup (if any) in the catalog :param status_filter: default DEFAULT_STATUS_FILTER. The status of the backup returned """ if not isinstance(status_filter, tuple): status_filter = tuple(status_filter) backup = LocalBackupInfo(self.server, backup_id=backup_id) available_backups = self.get_available_backups(status_filter + (backup.status, )) ids = sorted(available_backups.keys()) try: current = ids.index(backup_id) while current < (len(ids) - 1): res = available_backups[ids[current + 1]] if res.status in status_filter: return res current += 1 return None except ValueError: raise UnknownBackupIdException('Could not find backup_id %s' % backup_id) def get_last_backup_id(self, status_filter=DEFAULT_STATUS_FILTER): """ Get the id of the latest/last backup in the catalog (if exists) :param status_filter: The status of the backup to return, default to DEFAULT_STATUS_FILTER. :return string|None: ID of the backup """ available_backups = self.get_available_backups(status_filter) if len(available_backups) == 0: return None ids = sorted(available_backups.keys()) return ids[-1] def get_first_backup_id(self, status_filter=DEFAULT_STATUS_FILTER): """ Get the id of the oldest/first backup in the catalog (if exists) :param status_filter: The status of the backup to return, default to DEFAULT_STATUS_FILTER. :return string|None: ID of the backup """ available_backups = self.get_available_backups(status_filter) if len(available_backups) == 0: return None ids = sorted(available_backups.keys()) return ids[0] def delete_backup(self, backup): """ Delete a backup :param backup: the backup to delete :return bool: True if deleted, False if could not delete the backup """ available_backups = self.get_available_backups( status_filter=(BackupInfo.DONE, )) minimum_redundancy = self.server.config.minimum_redundancy # Honour minimum required redundancy if backup.status == BackupInfo.DONE and \ minimum_redundancy >= len(available_backups): output.warning( "Skipping delete of backup %s for server %s " "due to minimum redundancy requirements " "(minimum redundancy = %s, " "current redundancy = %s)", backup.backup_id, self.config.name, minimum_redundancy, len(available_backups)) return False # Keep track of when the delete operation started. delete_start_time = datetime.datetime.now() # Run the pre_delete_script if present. script = HookScriptRunner(self, 'delete_script', 'pre') script.env_from_backup_info(backup) script.run() # Run the pre_delete_retry_script if present. retry_script = RetryHookScriptRunner(self, 'delete_retry_script', 'pre') retry_script.env_from_backup_info(backup) retry_script.run() output.info("Deleting backup %s for server %s", backup.backup_id, self.config.name) previous_backup = self.get_previous_backup(backup.backup_id) next_backup = self.get_next_backup(backup.backup_id) # Delete all the data contained in the backup try: self.delete_backup_data(backup) except OSError as e: output.error("Failure deleting backup %s for server %s.\n%s", backup.backup_id, self.config.name, e) return False # Check if we are deleting the first available backup if not previous_backup: # In the case of exclusive backup (default), removes any WAL # files associated to the backup being deleted. # In the case of concurrent backup, removes only WAL files # prior to the start of the backup being deleted, as they # might be useful to any concurrent backup started immediately # after. remove_until = None # means to remove all WAL files if next_backup: remove_until = next_backup elif BackupOptions.CONCURRENT_BACKUP in self.config.backup_options: remove_until = backup timelines_to_protect = set() # If remove_until is not set there are no backup left if remove_until: # Retrieve the list of extra timelines that contains at least # a backup. On such timelines we don't want to delete any WAL for value in self.get_available_backups( BackupInfo.STATUS_ARCHIVING).values(): # Ignore the backup that is being deleted if value == backup: continue timelines_to_protect.add(value.timeline) # Remove the timeline of `remove_until` from the list. # We have enough information to safely delete unused WAL files # on it. timelines_to_protect -= set([remove_until.timeline]) output.info("Delete associated WAL segments:") for name in self.remove_wal_before_backup(remove_until, timelines_to_protect): output.info("\t%s", name) # As last action, remove the backup directory, # ending the delete operation try: self.delete_basebackup(backup) except OSError as e: output.error( "Failure deleting backup %s for server %s.\n%s\n" "Please manually remove the '%s' directory", backup.backup_id, self.config.name, e, backup.get_basebackup_directory()) return False self.backup_cache_remove(backup) # Save the time of the complete removal of the backup delete_end_time = datetime.datetime.now() output.info( "Deleted backup %s (start time: %s, elapsed time: %s)", backup.backup_id, delete_start_time.ctime(), human_readable_timedelta(delete_end_time - delete_start_time)) # Remove the sync lockfile if exists sync_lock = ServerBackupSyncLock(self.config.barman_lock_directory, self.config.name, backup.backup_id) if os.path.exists(sync_lock.filename): _logger.debug("Deleting backup sync lockfile: %s" % sync_lock.filename) os.unlink(sync_lock.filename) # Run the post_delete_retry_script if present. try: retry_script = RetryHookScriptRunner(self, 'delete_retry_script', 'post') retry_script.env_from_backup_info(backup) retry_script.run() except AbortedRetryHookScript as e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning( "Ignoring stop request after receiving " "abort (exit code %d) from post-delete " "retry hook script: %s", e.hook.exit_status, e.hook.script) # Run the post_delete_script if present. script = HookScriptRunner(self, 'delete_script', 'post') script.env_from_backup_info(backup) script.run() return True def backup(self, wait=False, wait_timeout=None): """ Performs a backup for the server :param bool wait: wait for all the required WAL files to be archived :param int|None wait_timeout: :return BackupInfo: the generated BackupInfo """ _logger.debug("initialising backup information") self.executor.init() backup_info = None try: # Create the BackupInfo object representing the backup backup_info = LocalBackupInfo( self.server, backup_id=datetime.datetime.now().strftime('%Y%m%dT%H%M%S')) backup_info.set_attribute('systemid', self.server.systemid) backup_info.save() self.backup_cache_add(backup_info) output.info("Starting backup using %s method for server %s in %s", self.mode, self.config.name, backup_info.get_basebackup_directory()) # Run the pre-backup-script if present. script = HookScriptRunner(self, 'backup_script', 'pre') script.env_from_backup_info(backup_info) script.run() # Run the pre-backup-retry-script if present. retry_script = RetryHookScriptRunner(self, 'backup_retry_script', 'pre') retry_script.env_from_backup_info(backup_info) retry_script.run() # Do the backup using the BackupExecutor self.executor.backup(backup_info) # Create a restore point after a backup target_name = 'barman_%s' % backup_info.backup_id self.server.postgres.create_restore_point(target_name) # Free the Postgres connection self.server.postgres.close() # Compute backup size and fsync it on disk self.backup_fsync_and_set_sizes(backup_info) # Mark the backup as WAITING_FOR_WALS backup_info.set_attribute("status", BackupInfo.WAITING_FOR_WALS) # Use BaseException instead of Exception to catch events like # KeyboardInterrupt (e.g.: CTRL-C) except BaseException as e: msg_lines = force_str(e).strip().splitlines() # If the exception has no attached message use the raw # type name if len(msg_lines) == 0: msg_lines = [type(e).__name__] if backup_info: # Use only the first line of exception message # in backup_info error field backup_info.set_attribute("status", BackupInfo.FAILED) backup_info.set_attribute( "error", "failure %s (%s)" % (self.executor.current_action, msg_lines[0])) output.error("Backup failed %s.\nDETAILS: %s", self.executor.current_action, '\n'.join(msg_lines)) else: output.info("Backup end at LSN: %s (%s, %08X)", backup_info.end_xlog, backup_info.end_wal, backup_info.end_offset) executor = self.executor output.info( "Backup completed (start time: %s, elapsed time: %s)", self.executor.copy_start_time, human_readable_timedelta(datetime.datetime.now() - executor.copy_start_time)) # If requested, wait for end_wal to be archived if wait: try: self.server.wait_for_wal(backup_info.end_wal, wait_timeout) self.check_backup(backup_info) except KeyboardInterrupt: # Ignore CTRL-C pressed while waiting for WAL files output.info( "Got CTRL-C. Continuing without waiting for '%s' " "to be archived", backup_info.end_wal) finally: if backup_info: backup_info.save() # Make sure we are not holding any PostgreSQL connection # during the post-backup scripts self.server.close() # Run the post-backup-retry-script if present. try: retry_script = RetryHookScriptRunner( self, 'backup_retry_script', 'post') retry_script.env_from_backup_info(backup_info) retry_script.run() except AbortedRetryHookScript as e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning( "Ignoring stop request after receiving " "abort (exit code %d) from post-backup " "retry hook script: %s", e.hook.exit_status, e.hook.script) # Run the post-backup-script if present. script = HookScriptRunner(self, 'backup_script', 'post') script.env_from_backup_info(backup_info) script.run() output.result('backup', backup_info) return backup_info def recover(self, backup_info, dest, tablespaces=None, remote_command=None, **kwargs): """ Performs a recovery of a backup :param barman.infofile.LocalBackupInfo backup_info: the backup to recover :param str dest: the destination directory :param dict[str,str]|None tablespaces: a tablespace name -> location map (for relocation) :param str|None remote_command: default None. The remote command to recover the base backup, in case of remote backup. :kwparam str|None target_tli: the target timeline :kwparam str|None target_time: the target time :kwparam str|None target_xid: the target xid :kwparam str|None target_lsn: the target LSN :kwparam str|None target_name: the target name created previously with pg_create_restore_point() function call :kwparam bool|None target_immediate: end recovery as soon as consistency is reached :kwparam bool exclusive: whether the recovery is exclusive or not :kwparam str|None target_action: default None. The recovery target action :kwparam bool|None standby_mode: the standby mode if needed """ # Archive every WAL files in the incoming directory of the server self.server.archive_wal(verbose=False) # Delegate the recovery operation to a RecoveryExecutor object executor = RecoveryExecutor(self) # Run the pre_recovery_script if present. script = HookScriptRunner(self, 'recovery_script', 'pre') script.env_from_recover(backup_info, dest, tablespaces, remote_command, **kwargs) script.run() # Run the pre_recovery_retry_script if present. retry_script = RetryHookScriptRunner(self, 'recovery_retry_script', 'pre') retry_script.env_from_recover(backup_info, dest, tablespaces, remote_command, **kwargs) retry_script.run() # Execute the recovery. # We use a closing context to automatically remove # any resource eventually allocated during recovery. with closing(executor): recovery_info = executor.recover(backup_info, dest, tablespaces=tablespaces, remote_command=remote_command, **kwargs) # Run the post_recovery_retry_script if present. try: retry_script = RetryHookScriptRunner(self, 'recovery_retry_script', 'post') retry_script.env_from_recover(backup_info, dest, tablespaces, remote_command, **kwargs) retry_script.run() except AbortedRetryHookScript as e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning( "Ignoring stop request after receiving " "abort (exit code %d) from post-recovery " "retry hook script: %s", e.hook.exit_status, e.hook.script) # Run the post-recovery-script if present. script = HookScriptRunner(self, 'recovery_script', 'post') script.env_from_recover(backup_info, dest, tablespaces, remote_command, **kwargs) script.run() # Output recovery results output.result('recovery', recovery_info['results']) def archive_wal(self, verbose=True): """ Executes WAL maintenance operations, such as archiving and compression If verbose is set to False, outputs something only if there is at least one file :param bool verbose: report even if no actions """ for archiver in self.server.archivers: archiver.archive(verbose) def cron_retention_policy(self): """ Retention policy management """ enforce_retention_policies = self.server.enforce_retention_policies retention_policy_mode = self.config.retention_policy_mode if (enforce_retention_policies and retention_policy_mode == 'auto'): available_backups = self.get_available_backups( BackupInfo.STATUS_ALL) retention_status = self.config.retention_policy.report() for bid in sorted(retention_status.keys()): if retention_status[bid] == BackupInfo.OBSOLETE: output.info( "Enforcing retention policy: removing backup %s for " "server %s" % (bid, self.config.name)) self.delete_backup(available_backups[bid]) def delete_basebackup(self, backup): """ Delete the basebackup dir of a given backup. :param barman.infofile.LocalBackupInfo backup: the backup to delete """ backup_dir = backup.get_basebackup_directory() _logger.debug("Deleting base backup directory: %s" % backup_dir) shutil.rmtree(backup_dir) def delete_backup_data(self, backup): """ Delete the data contained in a given backup. :param barman.infofile.LocalBackupInfo backup: the backup to delete """ if backup.tablespaces: if backup.backup_version == 2: tbs_dir = backup.get_basebackup_directory() else: tbs_dir = os.path.join(backup.get_data_directory(), 'pg_tblspc') for tablespace in backup.tablespaces: rm_dir = os.path.join(tbs_dir, str(tablespace.oid)) if os.path.exists(rm_dir): _logger.debug("Deleting tablespace %s directory: %s" % (tablespace.name, rm_dir)) shutil.rmtree(rm_dir) pg_data = backup.get_data_directory() if os.path.exists(pg_data): _logger.debug("Deleting PGDATA directory: %s" % pg_data) shutil.rmtree(pg_data) def delete_wal(self, wal_info): """ Delete a WAL segment, with the given WalFileInfo :param barman.infofile.WalFileInfo wal_info: the WAL to delete """ # Run the pre_wal_delete_script if present. script = HookScriptRunner(self, 'wal_delete_script', 'pre') script.env_from_wal_info(wal_info) script.run() # Run the pre_wal_delete_retry_script if present. retry_script = RetryHookScriptRunner(self, 'wal_delete_retry_script', 'pre') retry_script.env_from_wal_info(wal_info) retry_script.run() error = None try: os.unlink(wal_info.fullpath(self.server)) try: os.removedirs(os.path.dirname(wal_info.fullpath(self.server))) except OSError: # This is not an error condition # We always try to remove the the trailing directories, # this means that hashdir is not empty. pass except OSError as e: error = ('Ignoring deletion of WAL file %s for server %s: %s' % (wal_info.name, self.config.name, e)) output.warning(error) # Run the post_wal_delete_retry_script if present. try: retry_script = RetryHookScriptRunner(self, 'wal_delete_retry_script', 'post') retry_script.env_from_wal_info(wal_info, None, error) retry_script.run() except AbortedRetryHookScript as e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning( "Ignoring stop request after receiving " "abort (exit code %d) from post-wal-delete " "retry hook script: %s", e.hook.exit_status, e.hook.script) # Run the post_wal_delete_script if present. script = HookScriptRunner(self, 'wal_delete_script', 'post') script.env_from_wal_info(wal_info, None, error) script.run() def check(self, check_strategy): """ This function does some checks on the server. :param CheckStrategy check_strategy: the strategy for the management of the results of the various checks """ check_strategy.init_check('compression settings') # Check compression_setting parameter if self.config.compression and not self.compression_manager.check(): check_strategy.result(self.config.name, False) else: status = True try: self.compression_manager.get_default_compressor() except CompressionIncompatibility as field: check_strategy.result(self.config.name, '%s setting' % field, False) status = False check_strategy.result(self.config.name, status) # Failed backups check check_strategy.init_check('failed backups') failed_backups = self.get_available_backups((BackupInfo.FAILED, )) status = len(failed_backups) == 0 check_strategy.result(self.config.name, status, hint='there are %s failed backups' % (len(failed_backups, ))) check_strategy.init_check('minimum redundancy requirements') # Minimum redundancy checks no_backups = len( self.get_available_backups(status_filter=(BackupInfo.DONE, ))) # Check minimum_redundancy_requirements parameter if no_backups < int(self.config.minimum_redundancy): status = False else: status = True check_strategy.result(self.config.name, status, hint='have %s backups, expected at least %s' % (no_backups, self.config.minimum_redundancy)) # TODO: Add a check for the existence of ssh and of rsync # Execute additional checks defined by the BackupExecutor if self.executor: self.executor.check(check_strategy) def status(self): """ This function show the server status """ # get number of backups no_backups = len( self.get_available_backups(status_filter=(BackupInfo.DONE, ))) output.result('status', self.config.name, "backups_number", "No. of available backups", no_backups) output.result('status', self.config.name, "first_backup", "First available backup", self.get_first_backup_id()) output.result('status', self.config.name, "last_backup", "Last available backup", self.get_last_backup_id()) # Minimum redundancy check. if number of backups minor than minimum # redundancy, fail. if no_backups < self.config.minimum_redundancy: output.result( 'status', self.config.name, "minimum_redundancy", "Minimum redundancy requirements", "FAILED (%s/%s)" % (no_backups, self.config.minimum_redundancy)) else: output.result( 'status', self.config.name, "minimum_redundancy", "Minimum redundancy requirements", "satisfied (%s/%s)" % (no_backups, self.config.minimum_redundancy)) # Output additional status defined by the BackupExecutor if self.executor: self.executor.status() def fetch_remote_status(self): """ Build additional remote status lines defined by the BackupManager. This method does not raise any exception in case of errors, but set the missing values to None in the resulting dictionary. :rtype: dict[str, None|str] """ if self.executor: return self.executor.get_remote_status() else: return {} def rebuild_xlogdb(self): """ Rebuild the whole xlog database guessing it from the archive content. """ from os.path import isdir, join output.info("Rebuilding xlogdb for server %s", self.config.name) root = self.config.wals_directory comp_manager = self.compression_manager wal_count = label_count = history_count = 0 # lock the xlogdb as we are about replacing it completely with self.server.xlogdb('w') as fxlogdb: xlogdb_new = fxlogdb.name + ".new" with open(xlogdb_new, 'w') as fxlogdb_new: for name in sorted(os.listdir(root)): # ignore the xlogdb and its lockfile if name.startswith(self.server.XLOG_DB): continue fullname = join(root, name) if isdir(fullname): # all relevant files are in subdirectories hash_dir = fullname for wal_name in sorted(os.listdir(hash_dir)): fullname = join(hash_dir, wal_name) if isdir(fullname): _logger.warning( 'unexpected directory ' 'rebuilding the wal database: %s', fullname) else: if xlog.is_wal_file(fullname): wal_count += 1 elif xlog.is_backup_file(fullname): label_count += 1 elif fullname.endswith('.tmp'): _logger.warning( 'temporary file found ' 'rebuilding the wal database: %s', fullname) continue else: _logger.warning( 'unexpected file ' 'rebuilding the wal database: %s', fullname) continue wal_info = comp_manager.get_wal_file_info( fullname) fxlogdb_new.write(wal_info.to_xlogdb_line()) else: # only history files are here if xlog.is_history_file(fullname): history_count += 1 wal_info = comp_manager.get_wal_file_info(fullname) fxlogdb_new.write(wal_info.to_xlogdb_line()) else: _logger.warning( 'unexpected file ' 'rebuilding the wal database: %s', fullname) os.fsync(fxlogdb_new.fileno()) shutil.move(xlogdb_new, fxlogdb.name) fsync_dir(os.path.dirname(fxlogdb.name)) output.info( 'Done rebuilding xlogdb for server %s ' '(history: %s, backup_labels: %s, wal_file: %s)', self.config.name, history_count, label_count, wal_count) def get_latest_archived_wals_info(self): """ Return a dictionary of timelines associated with the WalFileInfo of the last WAL file in the archive, or None if the archive doesn't contain any WAL file. :rtype: dict[str, WalFileInfo]|None """ from os.path import isdir, join root = self.config.wals_directory comp_manager = self.compression_manager # If the WAL archive directory doesn't exists the archive is empty if not isdir(root): return dict() # Traverse all the directory in the archive in reverse order, # returning the first WAL file found timelines = {} for name in sorted(os.listdir(root), reverse=True): fullname = join(root, name) # All relevant files are in subdirectories, so # we skip any non-directory entry if isdir(fullname): # Extract the timeline. If it is not valid, skip this directory try: timeline = name[0:8] int(timeline, 16) except ValueError: continue # If this timeline already has a file, skip this directory if timeline in timelines: continue hash_dir = fullname # Inspect contained files in reverse order for wal_name in sorted(os.listdir(hash_dir), reverse=True): fullname = join(hash_dir, wal_name) # Return the first file that has the correct name if not isdir(fullname) and xlog.is_wal_file(fullname): timelines[timeline] = comp_manager.get_wal_file_info( fullname) break # Return the timeline map return timelines def remove_wal_before_backup(self, backup_info, timelines_to_protect=None): """ Remove WAL files which have been archived before the start of the provided backup. If no backup_info is provided delete all available WAL files If timelines_to_protect list is passed, never remove a wal in one of these timelines. :param BackupInfo|None backup_info: the backup information structure :param set timelines_to_protect: optional list of timelines to protect :return list: a list of removed WAL files """ removed = [] with self.server.xlogdb() as fxlogdb: xlogdb_new = fxlogdb.name + ".new" with open(xlogdb_new, 'w') as fxlogdb_new: for line in fxlogdb: wal_info = WalFileInfo.from_xlogdb_line(line) if not xlog.is_any_xlog_file(wal_info.name): output.error( "invalid WAL segment name %r\n" "HINT: Please run \"barman rebuild-xlogdb %s\" " "to solve this issue", wal_info.name, self.config.name) continue # Keeps the WAL segment if it is a history file keep = xlog.is_history_file(wal_info.name) # Keeps the WAL segment if its timeline is in # `timelines_to_protect` if timelines_to_protect: tli, _, _ = xlog.decode_segment_name(wal_info.name) keep |= tli in timelines_to_protect # Keeps the WAL segment if it is a newer # than the given backup (the first available) if backup_info and backup_info.begin_wal is not None: keep |= wal_info.name >= backup_info.begin_wal # If the file has to be kept write it in the new xlogdb # otherwise delete it and record it in the removed list if keep: fxlogdb_new.write(wal_info.to_xlogdb_line()) else: self.delete_wal(wal_info) removed.append(wal_info.name) fxlogdb_new.flush() os.fsync(fxlogdb_new.fileno()) shutil.move(xlogdb_new, fxlogdb.name) fsync_dir(os.path.dirname(fxlogdb.name)) return removed def validate_last_backup_maximum_age(self, last_backup_maximum_age): """ Evaluate the age of the last available backup in a catalogue. If the last backup is older than the specified time interval (age), the function returns False. If within the requested age interval, the function returns True. :param timedate.timedelta last_backup_maximum_age: time interval representing the maximum allowed age for the last backup in a server catalogue :return tuple: a tuple containing the boolean result of the check and auxiliary information about the last backup current age """ # Get the ID of the last available backup backup_id = self.get_last_backup_id() if backup_id: # Get the backup object backup = LocalBackupInfo(self.server, backup_id=backup_id) now = datetime.datetime.now(dateutil.tz.tzlocal()) # Evaluate the point of validity validity_time = now - last_backup_maximum_age # Pretty print of a time interval (age) msg = human_readable_timedelta(now - backup.end_time) # If the backup end time is older than the point of validity, # return False, otherwise return true if backup.end_time < validity_time: return False, msg else: return True, msg else: # If no backup is available return false return False, "No available backups" def backup_fsync_and_set_sizes(self, backup_info): """ Fsync all files in a backup and set the actual size on disk of a backup. Also evaluate the deduplication ratio and the deduplicated size if applicable. :param LocalBackupInfo backup_info: the backup to update """ # Calculate the base backup size self.executor.current_action = "calculating backup size" _logger.debug(self.executor.current_action) backup_size = 0 deduplicated_size = 0 backup_dest = backup_info.get_basebackup_directory() for dir_path, _, file_names in os.walk(backup_dest): # execute fsync() on the containing directory fsync_dir(dir_path) # execute fsync() on all the contained files for filename in file_names: file_path = os.path.join(dir_path, filename) file_stat = fsync_file(file_path) backup_size += file_stat.st_size # Excludes hard links from real backup size if file_stat.st_nlink == 1: deduplicated_size += file_stat.st_size # Save size into BackupInfo object backup_info.set_attribute('size', backup_size) backup_info.set_attribute('deduplicated_size', deduplicated_size) if backup_info.size > 0: deduplication_ratio = 1 - (float(backup_info.deduplicated_size) / backup_info.size) else: deduplication_ratio = 0 if self.config.reuse_backup == 'link': output.info("Backup size: %s. Actual size on disk: %s" " (-%s deduplication ratio)." % (pretty_size(backup_info.size), pretty_size(backup_info.deduplicated_size), '{percent:.2%}'.format(percent=deduplication_ratio))) else: output.info("Backup size: %s" % pretty_size(backup_info.size)) def check_backup(self, backup_info): """ Make sure that all the required WAL files to check the consistency of a physical backup (that is, from the beginning to the end of the full backup) are correctly archived. This command is automatically invoked by the cron command and at the end of every backup operation. :param backup_info: the target backup """ # Gather the list of the latest archived wals timelines = self.get_latest_archived_wals_info() # Get the basic info for the backup begin_wal = backup_info.begin_wal end_wal = backup_info.end_wal timeline = begin_wal[:8] # Case 0: there is nothing to check for this backup, as it is # currently in progress if not end_wal: return # Case 1: Barman still doesn't know about the timeline the backup # started with. We still haven't archived any WAL corresponding # to the backup, so we can't proceed with checking the existence # of the required WAL files if not timelines or timeline not in timelines: backup_info.status = BackupInfo.WAITING_FOR_WALS backup_info.save() return # Find the most recent archived WAL for this server in the timeline # where the backup was taken last_archived_wal = timelines[timeline].name # Case 2: the most recent WAL file archived is older than the # start of the backup. We must wait for the archiver to receive # and/or process the WAL files. if last_archived_wal < begin_wal: backup_info.status = BackupInfo.WAITING_FOR_WALS backup_info.save() return # Check the intersection between the required WALs and the archived # ones. They should all exist segments = backup_info.get_required_wal_segments() missing_wal = None for wal in segments: # Stop checking if we reach the last archived wal if wal > last_archived_wal: break wal_full_path = self.server.get_wal_full_path(wal) if not os.path.exists(wal_full_path): missing_wal = wal break if missing_wal: # Case 3: the most recent WAL file archived is more recent than # the one corresponding to the start of a backup. If WAL # file is missing, then we can't recover from the backup so we # must mark the backup as FAILED. # TODO: Verify if the error field is the right place # to store the error message backup_info.error = ("At least one WAL file is missing. " "The first missing WAL file is %s" % missing_wal) backup_info.status = BackupInfo.FAILED backup_info.save() return if end_wal <= last_archived_wal: # Case 4: if the most recent WAL file archived is more recent or # equal than the one corresponding to the end of the backup and # every WAL that will be required by the recovery is available, # we can mark the backup as DONE. backup_info.status = BackupInfo.DONE else: # Case 5: if the most recent WAL file archived is older than # the one corresponding to the end of the backup but # all the WAL files until that point are present. backup_info.status = BackupInfo.WAITING_FOR_WALS backup_info.save()
def test_check_compression_none(self): # prepare mock obj config_mock = mock.Mock() config_mock.compression = "custom" comp_manager = CompressionManager(config_mock, None) assert comp_manager.check() is True
def test_decode_history_file(self, tmpdir): compressor = mock.Mock() # Regular history file p = tmpdir.join('00000002.history') p.write('1\t2/83000168\tat restore point "myrp"\n') wal_info = WalFileInfo.from_file(p.strpath) result = xlog.HistoryFileData( tli=2, parent_tli=1, reason='at restore point "myrp"', switchpoint=0x283000168) assert xlog.decode_history_file(wal_info, compressor) == [result] assert len(compressor.mock_calls) == 0 # Comments must be skipped p = tmpdir.join('00000003.history') p.write('# Comment\n1\t2/83000168\tat restore point "testcomment"\n') wal_info = WalFileInfo.from_file(p.strpath) result = xlog.HistoryFileData( tli=3, parent_tli=1, reason='at restore point "testcomment"', switchpoint=0x283000168) assert xlog.decode_history_file(wal_info, compressor) == [result] assert len(compressor.mock_calls) == 0 # History file with comments and empty lines p = tmpdir.join('00000004.history') p.write('# Comment\n\n1\t2/83000168\ttesting "testemptyline"\n') wal_info = WalFileInfo.from_file(p.strpath) result = xlog.HistoryFileData( tli=4, parent_tli=1, reason='testing "testemptyline"', switchpoint=0x283000168) assert xlog.decode_history_file(wal_info, compressor) == [result] assert len(compressor.mock_calls) == 0 # Test compression handling Fix for bug #66 on github config_mock = mock.Mock() config_mock.compression = "gzip" # check custom compression method creation comp_manager = CompressionManager(config_mock, None) u = tmpdir.join('00000005.uncompressed') p = tmpdir.join('00000005.history') u.write('1\t2/83000168\tat restore point "myrp"\n') result = xlog.HistoryFileData( tli=5, parent_tli=1, reason='at restore point "myrp"', switchpoint=0x283000168) comp_manager.get_compressor('gzip').compress(u.strpath, p.strpath) wal_info = WalFileInfo.from_file(p.strpath) assert xlog.decode_history_file(wal_info, comp_manager) == [result] with pytest.raises(barman.exceptions.BadHistoryFileContents): # Empty file p.write('') assert xlog.decode_history_file(wal_info, compressor) assert len(compressor.mock_calls) == 0 with pytest.raises(barman.exceptions.BadHistoryFileContents): # Missing field p.write('1\t2/83000168') assert xlog.decode_history_file(wal_info, compressor) assert len(compressor.mock_calls) == 0 with pytest.raises(barman.exceptions.BadHistoryFileContents): # Unattended field p.write('1\t2/83000168\tat restore point "myrp"\ttest') assert xlog.decode_history_file(wal_info, compressor) assert len(compressor.mock_calls) == 0