def recover_basebackup_copy(self, backup, dest, remote_command=None): ''' Perform the actual copy of the base backup for recovery purposes :param backup: the backup to recover :param dest: the destination directory :param remote_command: default None. The remote command to recover the base backup, in case of remote backup. ''' sourcedir = os.path.join(backup.get_basebackup_directory(), 'pgdata') tablespaces_bwlimit={} if remote_command: dest = ':%s' % dest # validate the bandwidth rules against the tablespace list if self.config.tablespace_bandwidth_limit and backup.tablespaces: valid_tablespaces = dict([(tablespace_data[0], tablespace_data[1]) for tablespace_data in backup.tablespaces]) for tablespace, bwlimit in self.config.tablespace_bandwidth_limit.items(): if tablespace in valid_tablespaces: tablespace_dir = "pg_tblspc/%s" % (valid_tablespaces[tablespace],) tablespaces_bwlimit[tablespace_dir] = bwlimit rsync = RsyncPgData(ssh=remote_command, bwlimit=self.config.bandwidth_limit, exclude_and_protect=tablespaces_bwlimit.keys()) retval = rsync('%s/' % (sourcedir,), dest) if retval != 0: raise Exception("ERROR: data transfer failure") if remote_command and len(tablespaces_bwlimit) > 0: for tablespace_dir, bwlimit in tablespaces_bwlimit.items(): self.current_action = "copying tablespace '%s' with bwlimit %d" % ( tablespace_dir, bwlimit) _logger.debug(self.current_action) tb_rsync = RsyncPgData(ssh=remote_command, bwlimit=bwlimit) retval = tb_rsync( '%s/' % os.path.join(sourcedir, tablespace_dir), os.path.join(dest, tablespace_dir)) if retval != 0: msg = "ERROR: data transfer failure on directory '%s'" % ( tablespace_dir,) _logger.exception(msg) raise Exception(msg)
def _rsync_factory(self, item): """ Build the RsyncPgData object required for copying the provided item :param _RsyncCopyItem item: information about a copy operation :rtype: RsyncPgData """ # If the object already exists, use it if item in self.rsync_cache: return self.rsync_cache[item] # Prepare the command arguments args = self._reuse_args(item.reuse) # Merge the global exclude with the one into the item object if self.exclude and item.exclude: exclude = self.exclude + item.exclude else: exclude = self.exclude or item.exclude # Using `--ignore-missing-args` could fail in case # the local or the remote rsync is older than 3.1. # In that case we expect that during the analyze phase # we get an error. The analyze code must catch that error # and retry after flushing the rsync cache. if self.rsync_has_ignore_missing_args: args.append("--ignore-missing-args") # TODO: remove debug output or use it to progress tracking # By adding a double '--itemize-changes' option, the rsync # output will contain the full list of files that have been # touched, even those that have not changed args.append("--itemize-changes") args.append("--itemize-changes") # Build the rsync object that will execute the copy rsync = RsyncPgData( path=self.path, ssh=self.ssh_command, ssh_options=self.ssh_options, args=args, bwlimit=item.bwlimit, network_compression=self.network_compression, exclude=exclude, exclude_and_protect=item.exclude_and_protect, include=item.include, retry_times=self.retry_times, retry_sleep=self.retry_sleep, retry_handler=partial(self._retry_handler, item), ) self.rsync_cache[item] = rsync return rsync
def copy(self): """ Execute the actual copy """ for item in self.item_list: # Prepare the command arguments args = self._reuse_args(item.reuse) # Merge the global exclude with the one into the item object if self.exclude and item.exclude: exclude = self.exclude + item.exclude else: exclude = self.exclude or item.exclude # TODO: remove debug output or use it to progress tracking # By adding a double '--itemize-changes' option, the rsync # output will contain the full list of files that have been # touched, even those that have not changed args.append('--itemize-changes') args.append('--itemize-changes') # Build the rsync object that will execute the copy rsync = RsyncPgData(path=self.path, ssh=self.ssh_command, ssh_options=self.ssh_options, args=args, bwlimit=item.bwlimit, network_compression=self.network_compression, exclude=exclude, exclude_and_protect=item.exclude_and_protect, retry_times=self.retry_times, retry_sleep=self.retry_sleep, retry_handler=partial(self._retry_handler, item)) # Log the operation that is being executed _logger.info("Copying %s", item) # If the item is a directory use the smart copy algorithm, # otherwise run a plain rsync if item.is_directory: self._smart_copy(rsync, item.src, item.dst, self.safe_horizon, item.reuse) else: rsync(item.src, item.dst, allowed_retval=(0, 23, 24)) if rsync.ret == 23: if item.optional: _logger.warning("Ignoring error reading %s", item) else: raise CommandFailedException( dict(ret=rsync.ret, out=rsync.out, err=rsync.err))
def recover_xlog_copy(self, decompressor, xlogs, wal_dest, remote_command=None): ''' Restore WAL segments :param decompressor: the decompressor for the file (if any) :param xlogs: the xlog dictionary to recover :param wal_dest: the destination directory for xlog recover :param remote_command: default None. The remote command to recover the xlog, in case of remote backup. ''' rsync = RsyncPgData(ssh=remote_command) if remote_command: # If remote recovery tell rsync to copy them remotely wal_dest = ':%s' % wal_dest else: # we will not use rsync: destdir must exists if not os.path.exists(wal_dest): os.makedirs(wal_dest) if decompressor and remote_command: xlog_spool = tempfile.mkdtemp(prefix='barman_xlog-') for prefix in xlogs: source_dir = os.path.join(self.config.wals_directory, prefix) if decompressor: if remote_command: for segment in xlogs[prefix]: decompressor(os.path.join(source_dir, segment), os.path.join(xlog_spool, segment)) rsync.from_file_list(xlogs[prefix], xlog_spool, wal_dest) for segment in xlogs[prefix]: os.unlink(os.path.join(xlog_spool, segment)) else: # decompress directly to the right place for segment in xlogs[prefix]: decompressor(os.path.join(source_dir, segment), os.path.join(wal_dest, segment)) else: rsync.from_file_list(xlogs[prefix], "%s/" % os.path.join(self.config.wals_directory, prefix), wal_dest) if decompressor and remote_command: shutil.rmtree(xlog_spool)
def recover_basebackup_copy(self, backup, dest, remote_command=None): ''' Perform the actual copy of the base backup for recovery purposes :param backup: the backup to recover :param dest: the destination directory :param remote_command: default None. The remote command to recover the base backup, in case of remote backup. ''' rsync = RsyncPgData(ssh=remote_command) sourcedir = '%s/' % os.path.join(backup.get_basebackup_directory(), 'pgdata') if remote_command: dest = ':%s' % dest retval = rsync(sourcedir, dest) if retval != 0: raise Exception("ERROR: data transfer failure")
def backup_copy(self, backup_info): ''' Perform the copy of the backup. This function returns the size of the backup (in bytes) :param backup_info: the backup information structure ''' backup_dest = os.path.join(backup_info.get_basebackup_directory(), 'pgdata') rsync = RsyncPgData(ssh=self.server.ssh_command, ssh_options=self.server.ssh_options) retval = rsync(':%s/' % backup_info.pgdata, backup_dest) if retval not in (0, 24): msg = "ERROR: data transfer failure" _logger.exception(msg) raise Exception(msg) # Copy configuration files (if not inside PGDATA) current_action = "copying configuration files" _logger.debug(current_action) cf = self.server.get_pg_configuration_files() if cf: for key in sorted(cf.keys()): # Consider only those that reside outside of the original PGDATA if cf[key]: if cf[key].find(backup_info.pgdata) == 0: current_action = "skipping %s as contained in %s directory" % ( key, backup_info.pgdata) _logger.debug(current_action) continue else: current_action = "copying %s as outside %s directory" % ( key, backup_info.pgdata) _logger.info(current_action) retval = rsync(':%s' % cf[key], backup_dest) if retval not in (0, 24): raise Exception("ERROR: data transfer failure") current_action = "calculating backup size" _logger.debug(current_action) backup_size = 0 for dirpath, _, filenames in os.walk(backup_dest): for f in filenames: fp = os.path.join(dirpath, f) backup_size += os.path.getsize(fp) return backup_size
def _rsync_factory(self, item): """ Build the RsyncPgData object required for copying the provided item :param _RsyncCopyItem item: information about a copy operation :rtype: RsyncPgData """ # If the object already exists, use it if item in self.rsync_cache: return self.rsync_cache[item] # Prepare the command arguments args = self._reuse_args(item.reuse) # Merge the global exclude with the one into the item object if self.exclude and item.exclude: exclude = self.exclude + item.exclude else: exclude = self.exclude or item.exclude # TODO: remove debug output or use it to progress tracking # By adding a double '--itemize-changes' option, the rsync # output will contain the full list of files that have been # touched, even those that have not changed args.append('--itemize-changes') args.append('--itemize-changes') # Build the rsync object that will execute the copy rsync = RsyncPgData( path=self.path, ssh=self.ssh_command, ssh_options=self.ssh_options, args=args, bwlimit=item.bwlimit, network_compression=self.network_compression, exclude=exclude, exclude_and_protect=item.exclude_and_protect, include=item.include, retry_times=self.retry_times, retry_sleep=self.retry_sleep, retry_handler=partial(self._retry_handler, item) ) self.rsync_cache[item] = rsync return rsync
def recover_xlog_copy(self, decompressor, xlogs, wal_dest, remote_command=None): ''' Restore WAL segments :param decompressor: the decompressor for the file (if any) :param xlogs: the xlog dictionary to recover :param wal_dest: the destination directory for xlog recover :param remote_command: default None. The remote command to recover the xlog, in case of remote backup. ''' rsync = RsyncPgData(ssh=remote_command) if remote_command: # If remote recovery tell rsync to copy them remotely wal_dest = ':%s' % wal_dest else: # we will not use rsync: destdir must exists if not os.path.exists(wal_dest): os.makedirs(wal_dest) if decompressor and remote_command: xlog_spool = tempfile.mkdtemp(prefix='barman_xlog-') for prefix in xlogs: source_dir = os.path.join(self.config.wals_directory, prefix) if decompressor: if remote_command: for segment in xlogs[prefix]: decompressor(os.path.join(source_dir, segment), os.path.join(xlog_spool, segment)) rsync.from_file_list(xlogs[prefix], xlog_spool, wal_dest) for segment in xlogs[prefix]: os.unlink(os.path.join(xlog_spool, segment)) else: # decompress directly to the right place for segment in xlogs[prefix]: decompressor(os.path.join(source_dir, segment), os.path.join(wal_dest, segment)) else: rsync.from_file_list( xlogs[prefix], "%s/" % os.path.join(self.config.wals_directory, prefix), wal_dest) if decompressor and remote_command: shutil.rmtree(xlog_spool)
def basebackup_copy(self, backup_info, dest, tablespaces=None, remote_command=None, safe_horizon=None): """ Perform the actual copy of the base backup for recovery purposes :param barman.infofile.BackupInfo backup_info: the backup to recover :param str dest: the destination directory :param dict[str,str]|None tablespaces: a tablespace name -> location map (for relocation) :param str|None remote_command: default None. The remote command to recover the base backup, in case of remote backup. :param datetime.datetime|None safe_horizon: anything after this time has to be checked with checksum """ # Dictionary for paths to be excluded from rsync exclude_and_protect = [] # Set a ':' prefix to remote destinations dest_prefix = '' if remote_command: dest_prefix = ':' # Copy tablespaces applying bwlimit when necessary if backup_info.tablespaces: tablespaces_bw_limit = self.config.tablespace_bandwidth_limit # Copy a tablespace at a time for tablespace in backup_info.tablespaces: # Apply bandwidth limit if requested bwlimit = self.config.bandwidth_limit if tablespaces_bw_limit and \ tablespace.name in tablespaces_bw_limit: bwlimit = tablespaces_bw_limit[tablespace.name] # By default a tablespace goes in the same location where # it was on the source server when the backup was taken location = tablespace.location # If a relocation has been requested for this tablespace # use the user provided target directory if tablespaces and tablespace.name in tablespaces: location = tablespaces[tablespace.name] # If the tablespace location is inside the data directory, # exclude and protect it from being deleted during # the data directory copy if location.startswith(dest): exclude_and_protect.append(location[len(dest):]) # Exclude and protect the tablespace from being deleted during # the data directory copy exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid) # Copy the tablespace using smart copy tb_rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=bwlimit, network_compression=self.config.network_compression, check=True) try: tb_rsync.smart_copy( '%s/' % backup_info.get_data_directory(tablespace.oid), dest_prefix + location, safe_horizon) except CommandFailedException as e: msg = "data transfer failure on directory '%s'" % location raise DataTransferFailure.from_command_error( 'rsync', e, msg) # Copy the pgdata directory rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=self.config.bandwidth_limit, exclude_and_protect=exclude_and_protect, network_compression=self.config.network_compression) try: rsync.smart_copy('%s/' % backup_info.get_data_directory(), dest_prefix + dest, safe_horizon) except CommandFailedException as e: msg = "data transfer failure on directory '%s'" % dest raise DataTransferFailure.from_command_error('rsync', e, msg)
def backup_copy(self, backup_info): """ Perform the actual copy of the backup using Rsync. First, it copies one tablespace at a time, then the PGDATA directory, and finally configuration files (if outside PGDATA). Bandwidth limitation, according to configuration, is applied in the process. This method is the core of base backup copy using Rsync+Ssh. :param barman.infofile.BackupInfo backup_info: backup information """ # List of paths to be ignored by Rsync exclude_and_protect = [] # Retrieve the previous backup metadata, then set safe_horizon previous_backup = self.backup_manager.get_previous_backup( backup_info.backup_id) if previous_backup: # safe_horizon is a tz-aware timestamp because BackupInfo class # ensures it safe_horizon = previous_backup.begin_time else: # If no previous backup is present, safe_horizon is set to None safe_horizon = None # Copy tablespaces applying bwlimit when necessary if backup_info.tablespaces: tablespaces_bw_limit = self.config.tablespace_bandwidth_limit # Copy a tablespace at a time for tablespace in backup_info.tablespaces: self.current_action = "copying tablespace '%s'" % \ tablespace.name # Apply bandwidth limit if requested bwlimit = self.config.bandwidth_limit if tablespaces_bw_limit and \ tablespace.name in tablespaces_bw_limit: bwlimit = tablespaces_bw_limit[tablespace.name] if bwlimit: self.current_action += (" with bwlimit '%d'" % bwlimit) _logger.debug(self.current_action) # If the tablespace location is inside the data directory, # exclude and protect it from being copied twice during # the data directory copy if tablespace.location.startswith(backup_info.pgdata): exclude_and_protect.append( tablespace.location[len(backup_info.pgdata):]) # Make sure the destination directory exists in order for # smart copy to detect that no file is present there tablespace_dest = backup_info.get_data_directory( tablespace.oid) mkpath(tablespace_dest) # Exclude and protect the tablespace from being copied again # during the data directory copy exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid) # Copy the backup using smart_copy trying to reuse the # tablespace of the previous backup if incremental is active ref_dir = self._reuse_dir(previous_backup, tablespace.oid) tb_rsync = RsyncPgData( path=self.server.path, ssh=self.ssh_command, ssh_options=self.ssh_options, args=self._reuse_args(ref_dir), bwlimit=bwlimit, network_compression=self.config.network_compression, check=True) try: tb_rsync.smart_copy( ':%s/' % tablespace.location, tablespace_dest, safe_horizon, ref_dir) except CommandFailedException as e: msg = "data transfer failure on directory '%s'" % \ backup_info.get_data_directory(tablespace.oid) raise DataTransferFailure.from_rsync_error(e, msg) # Make sure the destination directory exists in order for smart copy # to detect that no file is present there backup_dest = backup_info.get_data_directory() mkpath(backup_dest) # Copy the PGDATA, trying to reuse the data dir # of the previous backup if incremental is active ref_dir = self._reuse_dir(previous_backup) rsync = RsyncPgData( path=self.server.path, ssh=self.ssh_command, ssh_options=self.ssh_options, args=self._reuse_args(ref_dir), bwlimit=self.config.bandwidth_limit, exclude_and_protect=exclude_and_protect, network_compression=self.config.network_compression) try: rsync.smart_copy(':%s/' % backup_info.pgdata, backup_dest, safe_horizon, ref_dir) except CommandFailedException as e: msg = "data transfer failure on directory '%s'" % \ backup_info.pgdata raise DataTransferFailure.from_rsync_error(e, msg) # At last copy pg_control try: rsync(':%s/global/pg_control' % (backup_info.pgdata,), '%s/global/pg_control' % (backup_dest,)) except CommandFailedException as e: msg = "data transfer failure on file '%s/global/pg_control'" % \ backup_info.pgdata raise DataTransferFailure.from_rsync_error(e, msg) # Copy configuration files (if not inside PGDATA) self.current_action = "copying configuration files" _logger.debug(self.current_action) for key in ('config_file', 'hba_file', 'ident_file'): cf = getattr(backup_info, key, None) if cf: assert isinstance(cf, str) # Consider only those that reside outside of the original # PGDATA directory if cf.startswith(backup_info.pgdata): self.current_action = \ "skipping %s as contained in %s directory" % ( key, backup_info.pgdata) _logger.debug(self.current_action) continue self.current_action = "copying %s as outside %s directory" % ( key, backup_info.pgdata) _logger.info(self.current_action) try: rsync(':%s' % cf, backup_dest) except CommandFailedException as e: ret_code = e.args[0]['ret'] msg = "data transfer failure on file '%s'" % cf if 'ident_file' == key and ret_code == 23: # If the ident file is missing, # it isn't an error condition for PostgreSQL. # Barman is consistent with this behavior. output.warning(msg, log=True) continue else: raise DataTransferFailure.from_rsync_error(e, msg) # Check for any include directives in PostgreSQL configuration # Currently, include directives are not supported for files that # reside outside PGDATA. These files must be manually backed up. # Barman will emit a warning and list those files if backup_info.included_files: filtered_files = [ included_file for included_file in backup_info.included_files if not included_file.startswith(backup_info.pgdata) ] if len(filtered_files) > 0: output.warning( "The usage of include directives is not supported " "for files that reside outside PGDATA.\n" "Please manually backup the following files:\n" "\t%s\n", "\n\t".join(filtered_files) )
def backup_copy(self, backup_info): """ Perform the actual copy of the backup using Rsync. First, it copies one tablespace at a time, then the PGDATA directory, and finally configuration files (if outside PGDATA). Bandwidth limitation, according to configuration, is applied in the process. This method is the core of base backup copy using Rsync+Ssh. :param barman.infofile.BackupInfo backup_info: backup information """ # List of paths to be ignored by Rsync exclude_and_protect = [] # Retrieve the previous backup metadata, then set safe_horizon previous_backup = self.backup_manager.get_previous_backup( backup_info.backup_id) if previous_backup: # safe_horizon is a tz-aware timestamp because BackupInfo class # ensures it safe_horizon = previous_backup.begin_time else: # If no previous backup is present, safe_horizon is set to None safe_horizon = None # Copy tablespaces applying bwlimit when necessary if backup_info.tablespaces: tablespaces_bw_limit = self.config.tablespace_bandwidth_limit # Copy a tablespace at a time for tablespace in backup_info.tablespaces: self.current_action = "copying tablespace '%s'" % \ tablespace.name # Apply bandwidth limit if requested bwlimit = self.config.bandwidth_limit if tablespaces_bw_limit and \ tablespace.name in tablespaces_bw_limit: bwlimit = tablespaces_bw_limit[tablespace.name] if bwlimit: self.current_action += (" with bwlimit '%d'" % bwlimit) _logger.debug(self.current_action) # If the tablespace location is inside the data directory, # exclude and protect it from being copied twice during # the data directory copy if tablespace.location.startswith(backup_info.pgdata): exclude_and_protect.append( tablespace.location[len(backup_info.pgdata):]) # Make sure the destination directory exists in order for # smart copy to detect that no file is present there tablespace_dest = backup_info.get_data_directory(tablespace.oid) mkpath(tablespace_dest) # Exclude and protect the tablespace from being copied again # during the data directory copy exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid) # Copy the backup using smart_copy trying to reuse the # tablespace of the previous backup if incremental is active ref_dir = self._reuse_dir(previous_backup, tablespace.oid) tb_rsync = RsyncPgData( path=self.server.path, ssh=self.ssh_command, ssh_options=self.ssh_options, args=self._reuse_args(ref_dir), bwlimit=bwlimit, network_compression=self.config.network_compression, check=True) try: tb_rsync.smart_copy( ':%s/' % tablespace.location, tablespace_dest, safe_horizon, ref_dir) except CommandFailedException, e: msg = "data transfer failure on directory '%s'" % \ backup_info.get_data_directory(tablespace.oid) raise DataTransferFailure.from_rsync_error(e, msg)
def _generate_recovery_conf(self, recovery_info, backup_info, dest, exclusive, remote_command, target_name, target_time, target_tli, target_xid): """ Generate a recovery.conf file for PITR containing all the required configurations :param dict recovery_info: Dictionary containing all the recovery parameters :param barman.infofile.BackupInfo backup_info: representation of a backup :param str dest: destination directory of the recovery :param boolean exclusive: exclusive backup or concurrent :param str remote_command: ssh command for remote connection :param str target_name: recovery target name for PITR :param str target_time: recovery target time for PITR :param str target_tli: recovery target timeline for PITR :param str target_xid: recovery target transaction id for PITR """ if remote_command: recovery = open( os.path.join(recovery_info['tempdir'], 'recovery.conf'), 'w') else: recovery = open(os.path.join(dest, 'recovery.conf'), 'w') # If GET_WAL has been set, use the get-wal command to retrieve the # required wal files. Otherwise use the unix command "cp" to copy # them from the barman_xlog directory if recovery_info['get_wal']: # We need to create the right restore command. # If we are doing a remote recovery, # the barman-cli package is REQUIRED on the server that is hosting # the PostgreSQL server. # We use the machine FQDN and the barman_user # setting to call the barman-wal-restore correctly. # If local recovery, we use barman directly, assuming # the postgres process will be executed with the barman user. # It MUST to be reviewed by the user in any case. if remote_command: fqdn = socket.getfqdn() print( "# The 'barman-wal-restore' command " "is provided in the 'barman-cli' package", file=recovery) print("restore_command = 'barman-wal-restore -U %s " "%s %s %%f %%p'" % (self.config.config.user, fqdn, self.config.name), file=recovery) else: print("# The 'barman get-wal' command " "must run as '%s' user" % self.config.config.user, file=recovery) print("restore_command = 'sudo -u %s " "barman get-wal %s %%f > %%p'" % (self.config.config.user, self.config.name), file=recovery) recovery_info['results']['get_wal'] = True else: print("restore_command = 'cp barman_xlog/%f %p'", file=recovery) if backup_info.version >= 80400 and \ not recovery_info['get_wal']: print("recovery_end_command = 'rm -fr barman_xlog'", file=recovery) if target_time: print("recovery_target_time = '%s'" % target_time, file=recovery) if target_tli: print("recovery_target_timeline = %s" % target_tli, file=recovery) if target_xid: print("recovery_target_xid = '%s'" % target_xid, file=recovery) if target_name: print("recovery_target_name = '%s'" % target_name, file=recovery) if (target_xid or target_time) and exclusive: print("recovery_target_inclusive = '%s'" % (not exclusive), file=recovery) recovery.close() if remote_command: plain_rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=self.config.bandwidth_limit, network_compression=self.config.network_compression) try: plain_rsync.from_file_list(['recovery.conf'], recovery_info['tempdir'], ':%s' % dest) except CommandFailedException as e: output.error('remote copy of recovery.conf failed: %s', e) output.close_and_exit()
def _xlog_copy(self, required_xlog_files, wal_dest, remote_command): """ Restore WAL segments :param required_xlog_files: list of all required WAL files :param wal_dest: the destination directory for xlog recover :param remote_command: default None. The remote command to recover the xlog, in case of remote backup. """ # List of required WAL files partitioned by containing directory xlogs = collections.defaultdict(list) # add '/' suffix to ensure it is a directory wal_dest = '%s/' % wal_dest # Map of every compressor used with any WAL file in the archive, # to be used during this recovery compressors = {} compression_manager = self.backup_manager.compression_manager # Fill xlogs and compressors maps from required_xlog_files for wal_info in required_xlog_files: hashdir = xlog.hash_dir(wal_info.name) xlogs[hashdir].append(wal_info) # If a compressor is required, make sure it exists in the cache if wal_info.compression is not None and \ wal_info.compression not in compressors: compressors[wal_info.compression] = \ compression_manager.get_compressor( compression=wal_info.compression) rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=self.config.bandwidth_limit, network_compression=self.config.network_compression) # If compression is used and this is a remote recovery, we need a # temporary directory where to spool uncompressed files, # otherwise we either decompress every WAL file in the local # destination, or we ship the uncompressed file remotely if compressors: if remote_command: # Decompress to a temporary spool directory wal_decompression_dest = tempfile.mkdtemp( prefix='barman_xlog-') else: # Decompress directly to the destination directory wal_decompression_dest = wal_dest # Make sure wal_decompression_dest exists mkpath(wal_decompression_dest) else: # If no compression wal_decompression_dest = None if remote_command: # If remote recovery tell rsync to copy them remotely # add ':' prefix to mark it as remote wal_dest = ':%s' % wal_dest total_wals = sum(map(len, xlogs.values())) partial_count = 0 for prefix in sorted(xlogs): batch_len = len(xlogs[prefix]) partial_count += batch_len source_dir = os.path.join(self.config.wals_directory, prefix) _logger.info("Starting copy of %s WAL files %s/%s from %s to %s", batch_len, partial_count, total_wals, xlogs[prefix][0], xlogs[prefix][-1]) # If at least one compressed file has been found, activate # compression check and decompression for each WAL files if compressors: for segment in xlogs[prefix]: dst_file = os.path.join(wal_decompression_dest, segment.name) if segment.compression is not None: compressors[segment.compression].decompress( os.path.join(source_dir, segment.name), dst_file) else: shutil.copy2(os.path.join(source_dir, segment.name), dst_file) if remote_command: try: # Transfer the WAL files rsync.from_file_list( list(segment.name for segment in xlogs[prefix]), wal_decompression_dest, wal_dest) except CommandFailedException as e: msg = ("data transfer failure while copying WAL files " "to directory '%s'") % (wal_dest[1:], ) raise DataTransferFailure.from_command_error( 'rsync', e, msg) # Cleanup files after the transfer for segment in xlogs[prefix]: file_name = os.path.join(wal_decompression_dest, segment.name) try: os.unlink(file_name) except OSError as e: output.warning( "Error removing temporary file '%s': %s", file_name, e) else: try: rsync.from_file_list( list(segment.name for segment in xlogs[prefix]), "%s/" % os.path.join(self.config.wals_directory, prefix), wal_dest) except CommandFailedException as e: msg = "data transfer failure while copying WAL files " \ "to directory '%s'" % (wal_dest[1:],) raise DataTransferFailure.from_command_error( 'rsync', e, msg) _logger.info("Finished copying %s WAL files.", total_wals) # Remove local decompression target directory if different from the # destination directory (it happens when compression is in use during a # remote recovery if wal_decompression_dest and wal_decompression_dest != wal_dest: shutil.rmtree(wal_decompression_dest)
def _setup(self, backup_info, remote_command, dest): """ Prepare the recovery_info dictionary for the recovery, as well as temporary working directory :param barman.infofile.BackupInfo backup_info: representation of a backup :param str remote_command: ssh command for remote connection :return dict: recovery_info dictionary, holding the basic values for a recovery """ # Calculate the name of the WAL directory if backup_info.version < 100000: wal_dest = os.path.join(dest, 'pg_xlog') else: wal_dest = os.path.join(dest, 'pg_wal') recovery_info = { 'cmd': None, 'recovery_dest': 'local', 'rsync': None, 'configuration_files': [], 'destination_path': dest, 'temporary_configuration_files': [], 'tempdir': tempfile.mkdtemp(prefix='barman_recovery-'), 'is_pitr': False, 'wal_dest': wal_dest, 'get_wal': RecoveryOptions.GET_WAL in self.config.recovery_options, } # A map that will keep track of the results of the recovery. # Used for output generation results = { 'changes': [], 'warnings': [], 'delete_barman_xlog': False, 'missing_files': [], 'get_wal': False, } recovery_info['results'] = results # Set up a list of configuration files recovery_info['configuration_files'].append('postgresql.conf') if backup_info.version >= 90400: recovery_info['configuration_files'].append('postgresql.auto.conf') # Handle remote recovery options if remote_command: recovery_info['recovery_dest'] = 'remote' try: recovery_info['rsync'] = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=self.config.bandwidth_limit, network_compression=self.config.network_compression) except CommandFailedException: self._teardown(recovery_info) raise try: # create a UnixRemoteCommand obj if is a remote recovery recovery_info['cmd'] = UnixRemoteCommand(remote_command, path=self.server.path) except FsOperationFailed: self._teardown(recovery_info) output.error( "Unable to connect to the target host using the command " "'%s'", remote_command) output.close_and_exit() else: # if is a local recovery create a UnixLocalCommand recovery_info['cmd'] = UnixLocalCommand() return recovery_info
def recover(self, backup, dest, tablespaces, target_tli, target_time, target_xid, exclusive, remote_command): ''' Performs a recovery of a backup :param backup: the backup to recover :param dest: the destination directory :param tablespaces: a dictionary of tablespaces :param target_tli: the target timeline :param target_time: the target time :param target_xid: the target xid :param exclusive: whether the recovery is exlusive or not :param remote_command: default None. The remote command to recover the base backup, in case of remote backup. ''' for line in self.cron(False): yield line recovery_dest = 'local' if remote_command: recovery_dest = 'remote' rsync = RsyncPgData(ssh=remote_command) msg = "Starting %s restore for server %s using backup %s " % (recovery_dest, self.config.name, backup.backup_id) yield msg _logger.info(msg) msg = "Destination directory: %s" % dest yield msg _logger.info(msg) if backup.tablespaces: if remote_command: # TODO: remote dir preparation msg = "Skipping remote directory preparation, you must have done it by yourself." yield msg _logger.warning(msg) else: tblspc_dir = os.path.join(dest, 'pg_tblspc') if not os.path.exists(tblspc_dir): os.makedirs(tblspc_dir) for name, oid, location in backup.tablespaces: try: if name in tablespaces: location = tablespaces[name] tblspc_file = os.path.join(tblspc_dir, str(oid)) if os.path.exists(tblspc_file): os.unlink(tblspc_file) if os.path.exists(location) and not os.path.isdir(location): os.unlink(location) if not os.path.exists(location): os.makedirs(location) # test permissiones barman_write_check_file = os.path.join(location, '.barman_write_check') file(barman_write_check_file, 'a').close() os.unlink(barman_write_check_file) os.symlink(location, tblspc_file) except: msg = "ERROR: unable to prepare '%s' tablespace (destination '%s')" % (name, location) _logger.critical(msg) raise SystemExit(msg) yield "\t%s, %s, %s" % (oid, name, location) target_epoch = None if target_time: try: target_datetime = dateutil.parser.parse(target_time) except: msg = "ERROR: unable to parse the target time parameter %r" % target_time _logger.critical(msg) raise SystemExit(msg) target_epoch = time.mktime(target_datetime.timetuple()) + (target_datetime.microsecond / 1000000.) if target_time or target_xid or (target_tli and target_tli != backup.timeline): targets = {} if target_time: targets['time'] = str(target_datetime) if target_xid: targets['xid'] = str(target_xid) if target_tli and target_tli != backup.timeline: targets['timeline'] = str(target_tli) yield "Doing PITR. Recovery target %s" % \ (", ".join(["%s: %r" % (k, v) for k, v in targets.items()])) # Copy the base backup msg = "Copying the base backup." yield msg _logger.info(msg) self.recover_basebackup_copy(backup, dest, remote_command) _logger.info("Base backup copied.") # Prepare WAL segments local directory msg = "Copying required wal segments." _logger.info(msg) yield msg if target_time or target_xid or (target_tli and target_tli != backup.timeline): wal_dest = os.path.join(dest, 'barman_xlog') else: wal_dest = os.path.join(dest, 'pg_xlog') # Retrieve the list of required WAL segments according to recovery options xlogs = {} required_xlog_files = tuple(self.server.get_required_xlog_files(backup, target_tli, target_epoch, target_xid)) for filename in required_xlog_files: hashdir = xlog.hash_dir(filename) if hashdir not in xlogs: xlogs[hashdir] = [] xlogs[hashdir].append(filename) # Check decompression options decompressor = self.compression_manager.get_decompressor() # Restore WAL segments self.recover_xlog_copy(decompressor, xlogs, wal_dest, remote_command) _logger.info("Wal segmets copied.") # Generate recovery.conf file (only if needed by PITR) if target_time or target_xid or (target_tli and target_tli != backup.timeline): msg = "Generating recovery.conf" yield msg _logger.info(msg) if remote_command: tempdir = tempfile.mkdtemp(prefix='barman_recovery-') recovery = open(os.path.join(tempdir, 'recovery.conf'), 'w') else: recovery = open(os.path.join(dest, 'recovery.conf'), 'w') print >> recovery, "restore_command = 'cp barman_xlog/%f %p'" print >> recovery, "recovery_end_command = 'rm -fr barman_xlog'" if target_time: print >> recovery, "recovery_target_time = '%s'" % target_time if target_tli: print >> recovery, "recovery_target_timeline = %s" % target_tli if target_xid: print >> recovery, "recovery_target_xid = '%s'" % target_xid if exclusive: print >> recovery, "recovery_target_inclusive = '%s'" % (not exclusive) recovery.close() if remote_command: recovery = rsync.from_file_list(['recovery.conf'], tempdir, ':%s' % dest) shutil.rmtree(tempdir) _logger.info('recovery.conf generated') else: # avoid shipping of just recovered pg_xlog files if remote_command: status_dir = tempfile.mkdtemp(prefix='barman_xlog_status-') else: status_dir = os.path.join(wal_dest, 'archive_status') os.makedirs(status_dir) # no need to check, it must not exist for filename in required_xlog_files: with file(os.path.join(status_dir, "%s.done" % filename), 'a') as f: f.write('') if remote_command: retval = rsync('%s/' % status_dir, ':%s' % os.path.join(wal_dest, 'archive_status')) if retval != 0: msg = "WARNING: unable to populate pg_xlog/archive_status dorectory" yield msg _logger.warning(msg) shutil.rmtree(status_dir) # Disable dangerous setting in the target data dir if remote_command: tempdir = tempfile.mkdtemp(prefix='barman_recovery-') pg_config = os.path.join(tempdir, 'postgresql.conf') shutil.copy2(os.path.join(backup.get_basebackup_directory(), 'pgdata', 'postgresql.conf'), pg_config) else: pg_config = os.path.join(dest, 'postgresql.conf') if self.pg_config_mangle(pg_config, {'archive_command': 'false'}, "%s.origin" % pg_config): msg = "The archive_command was set to 'false' to prevent data losses." yield msg _logger.info(msg) # Find dangerous options in the configuration file (locations) clashes = self.pg_config_detect_possible_issues(pg_config) if remote_command: recovery = rsync.from_file_list(['postgresql.conf', 'postgresql.conf.origin'], tempdir, ':%s' % dest) shutil.rmtree(tempdir) yield "" yield "Your PostgreSQL server has been successfully prepared for recovery!" yield "" yield "Please review network and archive related settings in the PostgreSQL" yield "configuration file before starting the just recovered instance." yield "" if clashes: yield "WARNING: Before starting up the recovered PostgreSQL server," yield "please review also the settings of the following configuration" yield "options as they might interfere with your current recovery attempt:" yield "" for name, value in sorted(clashes.items()): yield " %s = %s" % (name, value) yield "" _logger.info("Recovery completed successful.")
def backup_copy(self, backup_info): ''' Perform the copy of the backup. This function returns the size of the backup (in bytes) :param backup_info: the backup information structure ''' # validate the bandwidth rules against the tablespace list tablespaces_bwlimit={} if self.config.tablespace_bandwidth_limit and backup_info.tablespaces: valid_tablespaces = dict([(tablespace_data[0], tablespace_data[1]) for tablespace_data in backup_info.tablespaces]) for tablespace, bwlimit in self.config.tablespace_bandwidth_limit.items(): if tablespace in valid_tablespaces: tablespace_dir = "pg_tblspc/%s" % (valid_tablespaces[tablespace],) tablespaces_bwlimit[tablespace_dir] = bwlimit backup_dest = os.path.join(backup_info.get_basebackup_directory(), 'pgdata') rsync = RsyncPgData(ssh=self.server.ssh_command, ssh_options=self.server.ssh_options, bwlimit=self.config.bandwidth_limit, exclude_and_protect=tablespaces_bwlimit.keys()) retval = rsync(':%s/' % backup_info.pgdata, backup_dest) if retval not in (0, 24): msg = "ERROR: data transfer failure" _logger.exception(msg) raise Exception(msg) # deal with tablespaces with a different bwlimit if len(tablespaces_bwlimit) > 0: for tablespace_dir, bwlimit in tablespaces_bwlimit.items(): self.current_action = "copying tablespace '%s' with bwlimit %d" % ( tablespace_dir, bwlimit) _logger.debug(self.current_action) tb_rsync = RsyncPgData(ssh=self.server.ssh_command, ssh_options=self.server.ssh_options, bwlimit=bwlimit) retval = tb_rsync( ':%s/' % os.path.join(backup_info.pgdata, tablespace_dir), os.path.join(backup_dest, tablespace_dir)) if retval not in (0, 24): msg = "ERROR: data transfer failure on directory '%s'" % ( tablespace_dir,) _logger.exception(msg) raise Exception(msg) # Copy configuration files (if not inside PGDATA) self.current_action = "copying configuration files" _logger.debug(self.current_action) cf = self.server.get_pg_configuration_files() if cf: for key in sorted(cf.keys()): # Consider only those that reside outside of the original PGDATA if cf[key]: if cf[key].find(backup_info.pgdata) == 0: self.current_action = "skipping %s as contained in %s directory" % (key, backup_info.pgdata) _logger.debug(self.current_action) continue else: self.current_action = "copying %s as outside %s directory" % (key, backup_info.pgdata) _logger.info(self.current_action) retval = rsync(':%s' % cf[key], backup_dest) if retval not in (0, 24): raise Exception("ERROR: data transfer failure") self.current_action = "calculating backup size" _logger.debug(self.current_action) backup_size = 0 for dirpath, _, filenames in os.walk(backup_dest): for f in filenames: fp = os.path.join(dirpath, f) backup_size += os.path.getsize(fp) return backup_size
def recover(self, backup, dest, tablespaces, target_tli, target_time, target_xid, exclusive, remote_command): ''' Performs a recovery of a backup :param backup: the backup to recover :param dest: the destination directory :param tablespaces: a dictionary of tablespaces :param target_tli: the target timeline :param target_time: the target time :param target_xid: the target xid :param exclusive: whether the recovery is exlusive or not :param remote_command: default None. The remote command to recover the base backup, in case of remote backup. ''' for line in self.cron(False): yield line recovery_dest = 'local' if remote_command: recovery_dest = 'remote' rsync = RsyncPgData(ssh=remote_command) msg = "Starting %s restore for server %s using backup %s " % ( recovery_dest, self.config.name, backup.backup_id) yield msg _logger.info(msg) msg = "Destination directory: %s" % dest yield msg _logger.info(msg) if backup.tablespaces: if remote_command: # TODO: remote dir preparation msg = "Skipping remote directory preparation, you must have done it by yourself." yield msg _logger.warning(msg) else: tblspc_dir = os.path.join(dest, 'pg_tblspc') if not os.path.exists(tblspc_dir): os.makedirs(tblspc_dir) for name, oid, location in backup.tablespaces: try: if name in tablespaces: location = tablespaces[name] tblspc_file = os.path.join(tblspc_dir, str(oid)) if os.path.exists(tblspc_file): os.unlink(tblspc_file) if os.path.exists( location) and not os.path.isdir(location): os.unlink(location) if not os.path.exists(location): os.makedirs(location) # test permissiones barman_write_check_file = os.path.join( location, '.barman_write_check') file(barman_write_check_file, 'a').close() os.unlink(barman_write_check_file) os.symlink(location, tblspc_file) except: msg = "ERROR: unable to prepare '%s' tablespace (destination '%s')" % ( name, location) _logger.critical(msg) raise SystemExit(msg) yield "\t%s, %s, %s" % (oid, name, location) target_epoch = None if target_time: try: target_datetime = dateutil.parser.parse(target_time) except: msg = "ERROR: unable to parse the target time parameter %r" % target_time _logger.critical(msg) raise SystemExit(msg) target_epoch = time.mktime(target_datetime.timetuple()) + ( target_datetime.microsecond / 1000000.) if target_time or target_xid or (target_tli and target_tli != backup.timeline): targets = {} if target_time: targets['time'] = str(target_datetime) if target_xid: targets['xid'] = str(target_xid) if target_tli and target_tli != backup.timeline: targets['timeline'] = str(target_tli) yield "Doing PITR. Recovery target %s" % \ (", ".join(["%s: %r" % (k, v) for k, v in targets.items()])) # Copy the base backup msg = "Copying the base backup." yield msg _logger.info(msg) self.recover_basebackup_copy(backup, dest, remote_command) _logger.info("Base backup copied.") # Prepare WAL segments local directory msg = "Copying required wal segments." _logger.info(msg) yield msg if target_time or target_xid or (target_tli and target_tli != backup.timeline): wal_dest = os.path.join(dest, 'barman_xlog') else: wal_dest = os.path.join(dest, 'pg_xlog') # Retrieve the list of required WAL segments according to recovery options xlogs = {} required_xlog_files = tuple( self.server.get_required_xlog_files(backup, target_tli, target_epoch, target_xid)) for filename in required_xlog_files: hashdir = xlog.hash_dir(filename) if hashdir not in xlogs: xlogs[hashdir] = [] xlogs[hashdir].append(filename) # Check decompression options decompressor = self.compression_manager.get_decompressor() # Restore WAL segments self.recover_xlog_copy(decompressor, xlogs, wal_dest, remote_command) _logger.info("Wal segmets copied.") # Generate recovery.conf file (only if needed by PITR) if target_time or target_xid or (target_tli and target_tli != backup.timeline): msg = "Generating recovery.conf" yield msg _logger.info(msg) if remote_command: tempdir = tempfile.mkdtemp(prefix='barman_recovery-') recovery = open(os.path.join(tempdir, 'recovery.conf'), 'w') else: recovery = open(os.path.join(dest, 'recovery.conf'), 'w') print >> recovery, "restore_command = 'cp barman_xlog/%f %p'" print >> recovery, "recovery_end_command = 'rm -fr barman_xlog'" if target_time: print >> recovery, "recovery_target_time = '%s'" % target_time if target_tli: print >> recovery, "recovery_target_timeline = %s" % target_tli if target_xid: print >> recovery, "recovery_target_xid = '%s'" % target_xid if exclusive: print >> recovery, "recovery_target_inclusive = '%s'" % ( not exclusive) recovery.close() if remote_command: recovery = rsync.from_file_list(['recovery.conf'], tempdir, ':%s' % dest) shutil.rmtree(tempdir) _logger.info('recovery.conf generated') else: # avoid shipping of just recovered pg_xlog files if remote_command: status_dir = tempfile.mkdtemp(prefix='barman_xlog_status-') else: status_dir = os.path.join(wal_dest, 'archive_status') os.makedirs(status_dir) # no need to check, it must not exist for filename in required_xlog_files: with file(os.path.join(status_dir, "%s.done" % filename), 'a') as f: f.write('') if remote_command: retval = rsync( '%s/' % status_dir, ':%s' % os.path.join(wal_dest, 'archive_status')) if retval != 0: msg = "WARNING: unable to populate pg_xlog/archive_status dorectory" yield msg _logger.warning(msg) shutil.rmtree(status_dir) # Disable dangerous setting in the target data dir if remote_command: tempdir = tempfile.mkdtemp(prefix='barman_recovery-') pg_config = os.path.join(tempdir, 'postgresql.conf') shutil.copy2( os.path.join(backup.get_basebackup_directory(), 'pgdata', 'postgresql.conf'), pg_config) else: pg_config = os.path.join(dest, 'postgresql.conf') if self.pg_config_mangle(pg_config, {'archive_command': 'false'}, "%s.origin" % pg_config): msg = "The archive_command was set to 'false' to prevent data losses." yield msg _logger.info(msg) if remote_command: recovery = rsync.from_file_list( ['postgresql.conf', 'postgresql.conf.origin'], tempdir, ':%s' % dest) shutil.rmtree(tempdir) # Found dangerous options in the configuration file (locations) clashes = self.pg_config_detect_possible_issues(pg_config) yield "" yield "Your PostgreSQL server has been successfully prepared for recovery!" yield "" yield "Please review network and archive related settings in the PostgreSQL" yield "configuration file before starting the just recovered instance." yield "" if clashes: yield "WARNING: Before starting up the recovered PostgreSQL server," yield "please review the also settings of the following configuration" yield "options as they might interfere with your current recovery attempt:" yield "" for name, value in sorted(clashes.items()): yield " %s = %s" % (name, value) yield "" _logger.info("Recovery completed successful.")
def xlog_copy(self, required_xlog_files, wal_dest, remote_command): """ Restore WAL segments :param required_xlog_files: list of all required WAL files :param wal_dest: the destination directory for xlog recover :param remote_command: default None. The remote command to recover the xlog, in case of remote backup. """ # Retrieve the list of required WAL segments # according to recovery options xlogs = {} for wal_info in required_xlog_files: hashdir = xlog.hash_dir(wal_info.name) if hashdir not in xlogs: xlogs[hashdir] = [] xlogs[hashdir].append(wal_info.name) # Check decompression options compressor = self.backup_manager.compression_manager.get_compressor() rsync = RsyncPgData( ssh=remote_command, bwlimit=self.config.bandwidth_limit, network_compression=self.config.network_compression) if remote_command: # If remote recovery tell rsync to copy them remotely # add ':' prefix to mark it as remote # add '/' suffix to ensure it is a directory wal_dest = ':%s/' % wal_dest else: # we will not use rsync: destdir must exists mkpath(wal_dest) if compressor and remote_command: xlog_spool = tempfile.mkdtemp(prefix='barman_xlog-') total_wals = sum(map(len, xlogs.values())) partial_count = 0 for prefix in sorted(xlogs): batch_len = len(xlogs[prefix]) partial_count += batch_len source_dir = os.path.join(self.config.wals_directory, prefix) _logger.info( "Starting copy of %s WAL files %s/%s from %s to %s", batch_len, partial_count, total_wals, xlogs[prefix][0], xlogs[prefix][-1]) if compressor: if remote_command: for segment in xlogs[prefix]: compressor.decompress(os.path.join(source_dir, segment), os.path.join(xlog_spool, segment)) try: rsync.from_file_list(xlogs[prefix], xlog_spool, wal_dest) except CommandFailedException, e: msg = "data transfer failure while copying WAL files " \ "to directory '%s'" % (wal_dest[1:],) raise DataTransferFailure.from_rsync_error(e, msg) # Cleanup files after the transfer for segment in xlogs[prefix]: file_name = os.path.join(xlog_spool, segment) try: os.unlink(file_name) except OSError as e: output.warning( "Error removing temporary file '%s': %s", file_name, e) else: # decompress directly to the right place for segment in xlogs[prefix]: compressor.decompress(os.path.join(source_dir, segment), os.path.join(wal_dest, segment)) else: try: rsync.from_file_list( xlogs[prefix], "%s/" % os.path.join( self.config.wals_directory, prefix), wal_dest) except CommandFailedException, e: msg = "data transfer failure while copying WAL files " \ "to directory '%s'" % (wal_dest[1:],) raise DataTransferFailure.from_rsync_error(e, msg)
bwlimit=bwlimit, network_compression=self.config.network_compression, check=True) try: tb_rsync.smart_copy( '%s/' % backup_info.get_data_directory(tablespace.oid), dest_prefix + location, safe_horizon) except CommandFailedException, e: msg = "data transfer failure on directory '%s'" % location raise DataTransferFailure.from_rsync_error(e, msg) # Copy the pgdata directory rsync = RsyncPgData( ssh=remote_command, bwlimit=self.config.bandwidth_limit, exclude_and_protect=exclude_and_protect, network_compression=self.config.network_compression) try: rsync.smart_copy( '%s/' % backup_info.get_data_directory(), dest_prefix + dest, safe_horizon) except CommandFailedException, e: msg = "data transfer failure on directory '%s'" % dest raise DataTransferFailure.from_rsync_error(e, msg) # TODO: Manage different location for configuration files # TODO: that were not within the data directory def xlog_copy(self, required_xlog_files, wal_dest, remote_command):
def basebackup_copy(self, backup_info, dest, tablespaces=None, remote_command=None, safe_horizon=None): """ Perform the actual copy of the base backup for recovery purposes :param barman.infofile.BackupInfo backup_info: the backup to recover :param str dest: the destination directory :param dict[str,str]|None tablespaces: a tablespace name -> location map (for relocation) :param str|None remote_command: default None. The remote command to recover the base backup, in case of remote backup. :param datetime.datetime|None safe_horizon: anything after this time has to be checked with checksum """ # Dictionary for paths to be excluded from rsync exclude_and_protect = [] # Set a ':' prefix to remote destinations dest_prefix = '' if remote_command: dest_prefix = ':' # Copy tablespaces applying bwlimit when necessary if backup_info.tablespaces: tablespaces_bw_limit = self.config.tablespace_bandwidth_limit # Copy a tablespace at a time for tablespace in backup_info.tablespaces: # Apply bandwidth limit if requested bwlimit = self.config.bandwidth_limit if tablespaces_bw_limit and \ tablespace.name in tablespaces_bw_limit: bwlimit = tablespaces_bw_limit[tablespace.name] # By default a tablespace goes in the same location where # it was on the source server when the backup was taken location = tablespace.location # If a relocation has been requested for this tablespace # use the user provided target directory if tablespaces and tablespace.name in tablespaces: location = tablespaces[tablespace.name] # If the tablespace location is inside the data directory, # exclude and protect it from being deleted during # the data directory copy if location.startswith(dest): exclude_and_protect.append(location[len(dest):]) # Exclude and protect the tablespace from being deleted during # the data directory copy exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid) # Copy the tablespace using smart copy tb_rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=bwlimit, network_compression=self.config.network_compression, check=True) try: tb_rsync.smart_copy( '%s/' % backup_info.get_data_directory(tablespace.oid), dest_prefix + location, safe_horizon) except CommandFailedException as e: msg = "data transfer failure on directory '%s'" % location raise DataTransferFailure.from_rsync_error(e, msg) # Copy the pgdata directory rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=self.config.bandwidth_limit, exclude_and_protect=exclude_and_protect, network_compression=self.config.network_compression) try: rsync.smart_copy( '%s/' % backup_info.get_data_directory(), dest_prefix + dest, safe_horizon) except CommandFailedException as e: msg = "data transfer failure on directory '%s'" % dest raise DataTransferFailure.from_rsync_error(e, msg)
def _generate_recovery_conf(self, recovery_info, backup_info, dest, exclusive, remote_command, target_name, target_time, target_tli, target_xid): """ Generate a recovery.conf file for PITR containing all the required configurations :param dict recovery_info: Dictionary containing all the recovery parameters :param barman.infofile.BackupInfo backup_info: representation of a backup :param str dest: destination directory of the recovery :param boolean exclusive: exclusive backup or concurrent :param str remote_command: ssh command for remote connection :param str target_name: recovery target name for PITR :param str target_time: recovery target time for PITR :param str target_tli: recovery target timeline for PITR :param str target_xid: recovery target transaction id for PITR """ if remote_command: recovery = open(os.path.join(recovery_info['tempdir'], 'recovery.conf'), 'w') else: recovery = open(os.path.join(dest, 'recovery.conf'), 'w') # If GET_WAL has been set, use the get-wal command to retrieve the # required wal files. Otherwise use the unix command "cp" to copy # them from the barman_xlog directory if recovery_info['get_wal']: # We need to create the right restore command. # If we are doing a remote recovery, # the barman-cli package is REQUIRED on the server that is hosting # the PostgreSQL server. # We use the machine FQDN and the barman_user # setting to call the barman-wal-restore correctly. # If local recovery, we use barman directly, assuming # the postgres process will be executed with the barman user. # It MUST to be reviewed by the user in any case. if remote_command: fqdn = socket.getfqdn() print("# The 'barman-wal-restore' command " "is provided in the 'barman-cli' package", file=recovery) print("restore_command = 'barman-wal-restore -U %s " "%s %s %%f %%p'" % (self.config.config.user, fqdn, self.config.name), file=recovery) else: print("# The 'barman get-wal' command " "must run as '%s' user" % self.config.config.user, file=recovery) print("restore_command = 'sudo -u %s " "barman get-wal %s %%f > %%p'" % ( self.config.config.user, self.config.name), file=recovery) recovery_info['results']['get_wal'] = True else: print("restore_command = 'cp barman_xlog/%f %p'", file=recovery) if backup_info.version >= 80400 and \ not recovery_info['get_wal']: print("recovery_end_command = 'rm -fr barman_xlog'", file=recovery) if target_time: print("recovery_target_time = '%s'" % target_time, file=recovery) if target_tli: print("recovery_target_timeline = %s" % target_tli, file=recovery) if target_xid: print("recovery_target_xid = '%s'" % target_xid, file=recovery) if target_name: print("recovery_target_name = '%s'" % target_name, file=recovery) if (target_xid or target_time) and exclusive: print("recovery_target_inclusive = '%s'" % ( not exclusive), file=recovery) recovery.close() if remote_command: plain_rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=self.config.bandwidth_limit, network_compression=self.config.network_compression) try: plain_rsync.from_file_list(['recovery.conf'], recovery_info['tempdir'], ':%s' % dest) except CommandFailedException as e: output.error('remote copy of recovery.conf failed: %s', e) output.close_and_exit()
def xlog_copy(self, required_xlog_files, wal_dest, remote_command): """ Restore WAL segments :param required_xlog_files: list of all required WAL files :param wal_dest: the destination directory for xlog recover :param remote_command: default None. The remote command to recover the xlog, in case of remote backup. """ # List of required WAL files partitioned by containing directory xlogs = collections.defaultdict(list) # add '/' suffix to ensure it is a directory wal_dest = '%s/' % wal_dest # Map of every compressor used with any WAL file in the archive, # to be used during this recovery compressors = {} compression_manager = self.backup_manager.compression_manager # Fill xlogs and compressors maps from required_xlog_files for wal_info in required_xlog_files: hashdir = xlog.hash_dir(wal_info.name) xlogs[hashdir].append(wal_info) # If a compressor is required, make sure it exists in the cache if wal_info.compression is not None and \ wal_info.compression not in compressors: compressors[wal_info.compression] = \ compression_manager.get_compressor( compression=wal_info.compression) rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=self.config.bandwidth_limit, network_compression=self.config.network_compression) # If compression is used and this is a remote recovery, we need a # temporary directory where to spool uncompressed files, # otherwise we either decompress every WAL file in the local # destination, or we ship the uncompressed file remotely if compressors: if remote_command: # Decompress to a temporary spool directory wal_decompression_dest = tempfile.mkdtemp( prefix='barman_xlog-') else: # Decompress directly to the destination directory wal_decompression_dest = wal_dest # Make sure wal_decompression_dest exists mkpath(wal_decompression_dest) else: # If no compression wal_decompression_dest = None if remote_command: # If remote recovery tell rsync to copy them remotely # add ':' prefix to mark it as remote wal_dest = ':%s' % wal_dest total_wals = sum(map(len, xlogs.values())) partial_count = 0 for prefix in sorted(xlogs): batch_len = len(xlogs[prefix]) partial_count += batch_len source_dir = os.path.join(self.config.wals_directory, prefix) _logger.info( "Starting copy of %s WAL files %s/%s from %s to %s", batch_len, partial_count, total_wals, xlogs[prefix][0], xlogs[prefix][-1]) # If at least one compressed file has been found, activate # compression check and decompression for each WAL files if compressors: for segment in xlogs[prefix]: dst_file = os.path.join(wal_decompression_dest, segment.name) if segment.compression is not None: compressors[segment.compression].decompress( os.path.join(source_dir, segment.name), dst_file) else: shutil.copy2(os.path.join(source_dir, segment.name), dst_file) if remote_command: try: # Transfer the WAL files rsync.from_file_list( list(segment.name for segment in xlogs[prefix]), wal_decompression_dest, wal_dest) except CommandFailedException as e: msg = ("data transfer failure while copying WAL files " "to directory '%s'") % (wal_dest[1:],) raise DataTransferFailure.from_rsync_error(e, msg) # Cleanup files after the transfer for segment in xlogs[prefix]: file_name = os.path.join(wal_decompression_dest, segment.name) try: os.unlink(file_name) except OSError as e: output.warning( "Error removing temporary file '%s': %s", file_name, e) else: try: rsync.from_file_list( list(segment.name for segment in xlogs[prefix]), "%s/" % os.path.join(self.config.wals_directory, prefix), wal_dest) except CommandFailedException as e: msg = "data transfer failure while copying WAL files " \ "to directory '%s'" % (wal_dest[1:],) raise DataTransferFailure.from_rsync_error(e, msg) _logger.info("Finished copying %s WAL files.", total_wals) # Remove local decompression target directory if different from the # destination directory (it happens when compression is in use during a # remote recovery if wal_decompression_dest and wal_decompression_dest != wal_dest: shutil.rmtree(wal_decompression_dest)
msg = "data transfer failure on directory '%s'" % \ backup_info.get_data_directory(tablespace.oid) raise DataTransferFailure.from_rsync_error(e, msg) # Make sure the destination directory exists in order for smart copy # to detect that no file is present there backup_dest = backup_info.get_data_directory() mkpath(backup_dest) # Copy the pgdata, trying to reuse the data dir # of the previous backup if incremental is active ref_dir = self.reuse_dir(previous_backup) rsync = RsyncPgData( ssh=self.ssh_command, ssh_options=self.ssh_options, args=self.reuse_args(ref_dir), bwlimit=self.config.bandwidth_limit, exclude_and_protect=exclude_and_protect, network_compression=self.config.network_compression) try: rsync.smart_copy(':%s/' % backup_info.pgdata, backup_dest, safe_horizon, ref_dir) except CommandFailedException, e: msg = "data transfer failure on directory '%s'" % \ backup_info.pgdata raise DataTransferFailure.from_rsync_error(e, msg) # at last copy pg_control try: rsync(':%s/global/pg_control' % (backup_info.pgdata,),