def result_list_backup(self, backup_info, backup_size, wal_size, retention_status): """ Output a single backup in the list-backup command :param BackupInfo backup_info: backup we are displaying :param backup_size: size of base backup (with the required WAL files) :param wal_size: size of WAL files belonging to this backup (without the required WAL files) :param retention_status: retention policy status """ # If minimal is set only output the backup id if self.minimal: self.info(backup_info.backup_id) return out_list = [ "%s %s - " % (backup_info.server_name, backup_info.backup_id)] if backup_info.status == BackupInfo.DONE: end_time = backup_info.end_time.ctime() out_list.append('%s - Size: %s - WAL Size: %s' % (end_time, pretty_size(backup_size), pretty_size(wal_size))) if backup_info.tablespaces: tablespaces = [("%s:%s" % (tablespace.name, tablespace.location)) for tablespace in backup_info.tablespaces] out_list.append(' (tablespaces: %s)' % ', '.join(tablespaces)) if retention_status: out_list.append(' - %s' % retention_status) else: out_list.append(backup_info.status) self.info(''.join(out_list))
def backup_fsync_and_set_sizes(self, backup_info): """ Fsync all files in a backup and set the actual size on disk of a backup. Also evaluate the deduplication ratio and the deduplicated size if applicable. :param barman.infofile.BackupInfo backup_info: the backup to update """ # Calculate the base backup size self.executor.current_action = "calculating backup size" _logger.debug(self.executor.current_action) backup_size = 0 deduplicated_size = 0 backup_dest = backup_info.get_basebackup_directory() for dir_path, _, file_names in os.walk(backup_dest): # execute fsync() on the containing directory fsync_dir(dir_path) # execute fsync() on all the contained files for filename in file_names: file_path = os.path.join(dir_path, filename) file_fd = os.open(file_path, os.O_RDONLY) file_stat = os.fstat(file_fd) backup_size += file_stat.st_size # Excludes hard links from real backup size if file_stat.st_nlink == 1: deduplicated_size += file_stat.st_size os.fsync(file_fd) os.close(file_fd) # Save size into BackupInfo object backup_info.set_attribute('size', backup_size) backup_info.set_attribute('deduplicated_size', deduplicated_size) if backup_info.size > 0: deduplication_ratio = 1 - (float( backup_info.deduplicated_size) / backup_info.size) else: deduplication_ratio = 0 if self.config.reuse_backup == 'link': output.info( "Backup size: %s. Actual size on disk: %s" " (-%s deduplication ratio)." % ( pretty_size(backup_info.size), pretty_size(backup_info.deduplicated_size), '{percent:.2%}'.format(percent=deduplication_ratio) )) else: output.info("Backup size: %s" % pretty_size(backup_info.size))
def test_result_list_backup(self, capsys): # mock the backup info bi = build_test_backup_info() backup_size = 12345 wal_size = 54321 retention_status = 'test status' writer = output.ConsoleOutputWriter() # test minimal writer.init_list_backup(bi.server_name, True) writer.result_list_backup(bi, backup_size, wal_size, retention_status) writer.close() (out, err) = capsys.readouterr() assert writer.minimal assert bi.backup_id in out assert err == '' # test status=DONE output writer.init_list_backup(bi.server_name, False) writer.result_list_backup(bi, backup_size, wal_size, retention_status) writer.close() (out, err) = capsys.readouterr() assert not writer.minimal assert bi.server_name in out assert bi.backup_id in out assert str(bi.end_time.ctime()) in out for name, _, location in bi.tablespaces: assert '%s:%s' % (name, location) assert 'Size: ' + pretty_size(backup_size) in out assert 'WAL Size: ' + pretty_size(wal_size) in out assert err == '' # test status = FAILED output bi = build_test_backup_info(status=BackupInfo.FAILED) writer.init_list_backup(bi.server_name, False) writer.result_list_backup(bi, backup_size, wal_size, retention_status) writer.close() (out, err) = capsys.readouterr() assert not writer.minimal assert bi.server_name in out assert bi.backup_id in out assert bi.status in out
def test_result_show_backup(self, capsys): # mock the backup ext info ext_info = mock_backup_ext_info() writer = output.ConsoleOutputWriter() # test minimal writer.result_show_backup(ext_info) writer.close() (out, err) = capsys.readouterr() assert ext_info['server_name'] in out assert ext_info['backup_id'] in out assert ext_info['status'] in out assert str(ext_info['end_time']) in out for name, _, location in ext_info['tablespaces']: assert '%s: %s' % (name, location) in out assert (pretty_size(ext_info['size'] + ext_info['wal_size'])) in out assert (pretty_size(ext_info['wal_until_next_size'])) in out # TODO: this test can be expanded assert err == ''
def test_result_show_backup(self, capsys): # mock the backup ext info wal_per_second = 0.01 ext_info = mock_backup_ext_info(wals_per_second=wal_per_second) writer = output.ConsoleOutputWriter() # test minimal writer.result_show_backup(ext_info) writer.close() (out, err) = capsys.readouterr() assert ext_info["server_name"] in out assert ext_info["backup_id"] in out assert ext_info["status"] in out assert str(ext_info["end_time"]) in out for name, _, location in ext_info["tablespaces"]: assert "%s: %s" % (name, location) in out assert (pretty_size(ext_info["size"] + ext_info["wal_size"])) in out assert (pretty_size(ext_info["wal_until_next_size"])) in out assert "WAL rate : %0.2f/hour" % (wal_per_second * 3600) in out # TODO: this test can be expanded assert err == ""
def result_list_backup(self, backup_info, backup_size, wal_size, retention_status): """ Output a single backup in the list-backup command :param BackupInfo backup_info: backup we are displaying :param backup_size: size of base backup (with the required WAL files) :param wal_size: size of WAL files belonging to this backup (without the required WAL files) :param retention_status: retention policy status """ # If minimal is set only output the backup id if self.minimal: self.info(backup_info.backup_id) return out_list = [ "%s %s - " % (backup_info.server_name, backup_info.backup_id) ] if backup_info.status in BackupInfo.STATUS_COPY_DONE: end_time = backup_info.end_time.ctime() out_list.append( '%s - Size: %s - WAL Size: %s' % (end_time, pretty_size(backup_size), pretty_size(wal_size))) if backup_info.tablespaces: tablespaces = [ ("%s:%s" % (tablespace.name, tablespace.location)) for tablespace in backup_info.tablespaces ] out_list.append(' (tablespaces: %s)' % ', '.join(tablespaces)) if backup_info.status == BackupInfo.WAITING_FOR_WALS: out_list.append(' - %s' % BackupInfo.WAITING_FOR_WALS) if retention_status and retention_status != BackupInfo.NONE: out_list.append(' - %s' % retention_status) else: out_list.append(backup_info.status) self.info(''.join(out_list))
def result_check(self, server_name, check, status, hint=None): """ Record a server result of a server check and output it as INFO :param str server_name: the server is being checked :param str check: the check name :param bool status: True if succeeded :param str,None hint: hint to print if not None """ self._record_check(server_name, check, status, hint) if hint is None: self.info("\t%s: %s" % (check, "OK" if status else "FAILED")) elif type(hint) is int: self.info("\t%s: %s (%s)" % (check, "OK" if status else "FAILED", pretty_size(hint))) else: self.info("\t%s: %s (%s)" % (check, "OK" if status else "FAILED", hint))
def result_replication_status(self, server_name, target, xlog_location, standby_info): """ Record a result line of a server status command and output it as INFO :param str server_name: the replication server :param str target: all|hot-standby|wal-streamer :param str xlog_location: server's xlog location :param StatReplication standby_info: status info of a standby """ if target == 'hot-standby': title = 'hot standby servers' elif target == 'wal-streamer': title = 'WAL streamers' else: title = 'streaming clients' if self.minimal: # Minimal output if xlog_location: # xlog location from the master self.info("%s for master '%s' (xlog @ %s):", title.capitalize(), server_name, xlog_location) else: # We are connected to a standby self.info("%s for slave '%s':", title.capitalize(), server_name) else: # Full output self.info("Status of %s for server '%s':", title, server_name) # xlog location from the master if xlog_location: self.info(" Current xlog location on master: %s", xlog_location) if standby_info is not None and not len(standby_info): self.info(" No %s attached", title) return # Minimal output if self.minimal: n = 1 for standby in standby_info: if not standby.replay_location: # WAL streamer self.info(" %s. W) %s@%s S:%s W:%s P:%s AN:%s", n, standby.usename, standby.client_addr or 'socket', standby.sent_location, standby.write_location, standby.sync_priority, standby.application_name) else: # Standby self.info(" %s. %s) %s@%s S:%s F:%s R:%s P:%s AN:%s", n, standby.sync_state[0].upper(), standby.usename, standby.client_addr or 'socket', standby.sent_location, standby.flush_location, standby.replay_location, standby.sync_priority, standby.application_name) n += 1 else: n = 1 self.info(" Number of %s: %s", title, len(standby_info)) for standby in standby_info: self.info("") # Calculate differences in bytes sent_diff = diff_lsn(standby.sent_location, standby.current_location) write_diff = diff_lsn(standby.write_location, standby.current_location) flush_diff = diff_lsn(standby.flush_location, standby.current_location) replay_diff = diff_lsn(standby.replay_location, standby.current_location) # Determine the sync stage of the client sync_stage = None if not standby.replay_location: client_type = 'WAL streamer' max_level = 3 else: client_type = 'standby' max_level = 5 # Only standby can replay WAL info if replay_diff == 0: sync_stage = '5/5 Hot standby (max)' elif flush_diff == 0: sync_stage = '4/5 2-safe' # remote flush # If not yet done, set the sync stage if not sync_stage: if write_diff == 0: sync_stage = '3/%s Remote write' % max_level elif sent_diff == 0: sync_stage = '2/%s WAL Sent (min)' % max_level else: sync_stage = '1/%s 1-safe' % max_level # Synchronous standby if standby.sync_priority > 0: self.info(" %s. #%s %s %s", n, standby.sync_priority, standby.sync_state.capitalize(), client_type) # Asynchronous standby else: self.info(" %s. %s %s", n, standby.sync_state.capitalize(), client_type) self.info(" Application name: %s", standby.application_name) self.info(" Sync stage : %s", sync_stage) if standby.client_addr: self.info(" Communication : TCP/IP") self.info(" IP Address : %s " "/ Port: %s / Host: %s", standby.client_addr, standby.client_port, standby.client_hostname or '-') else: self.info(" Communication : Unix domain socket") self.info(" User name : %s", standby.usename) self.info(" Current state : %s (%s)", standby.state, standby.sync_state) self.info(" WAL sender PID : %s", standby.pid) self.info(" Started at : %s", standby.backend_start) if standby.backend_xmin: self.info(" Standby's xmin : %s", standby.backend_xmin or '-') if standby.sent_location: self.info(" Sent location : %s (diff: %s)", standby.sent_location, pretty_size(sent_diff)) if standby.write_location: self.info(" Write location : %s (diff: %s)", standby.write_location, pretty_size(write_diff)) if standby.flush_location: self.info(" Flush location : %s (diff: %s)", standby.flush_location, pretty_size(flush_diff)) if standby.replay_location: self.info(" Replay location : %s (diff: %s)", standby.replay_location, pretty_size(replay_diff)) n += 1
def result_show_backup(self, backup_ext_info): """ Output all available information about a backup in show-backup command The argument has to be the result of a Server.get_backup_ext_info() call :param dict backup_ext_info: a dictionary containing the info to display """ data = dict(backup_ext_info) self.info("Backup %s:", data['backup_id']) self.info(" Server Name : %s", data['server_name']) self.info(" Status : %s", data['status']) if data['status'] == BackupInfo.DONE: self.info(" PostgreSQL Version : %s", data['version']) self.info(" PGDATA directory : %s", data['pgdata']) if data['tablespaces']: self.info(" Tablespaces:") for item in data['tablespaces']: self.info(" %s: %s (oid: %s)", item.name, item.location, item.oid) self.info("") self.info(" Base backup information:") self.info(" Disk usage : %s (%s with WALs)", pretty_size(data['size']), pretty_size(data['size'] + data[ 'wal_size'])) if data['deduplicated_size'] is not None and data['size'] > 0: deduplication_ratio = 1 - (float(data['deduplicated_size']) / data['size']) self.info(" Incremental size : %s (-%s)", pretty_size(data['deduplicated_size']), '{percent:.2%}'.format(percent=deduplication_ratio) ) self.info(" Timeline : %s", data['timeline']) self.info(" Begin WAL : %s", data['begin_wal']) self.info(" End WAL : %s", data['end_wal']) self.info(" WAL number : %s", data['wal_num']) # Output WAL compression ratio for basebackup WAL files if data['wal_compression_ratio'] > 0: self.info(" WAL compression ratio: %s", '{percent:.2%}'.format( percent=data['wal_compression_ratio'])) self.info(" Begin time : %s", data['begin_time']) self.info(" End time : %s", data['end_time']) self.info(" Begin Offset : %s", data['begin_offset']) self.info(" End Offset : %s", data['end_offset']) self.info(" Begin XLOG : %s", data['begin_xlog']) self.info(" End XLOG : %s", data['end_xlog']) self.info("") self.info(" WAL information:") self.info(" No of files : %s", data['wal_until_next_num']) self.info(" Disk usage : %s", pretty_size(data['wal_until_next_size'])) # Output WAL rate if data['wals_per_second'] > 0: self.info(" WAL rate : %0.2f/hour", data['wals_per_second'] * 3600) # Output WAL compression ratio for archived WAL files if data['wal_until_next_compression_ratio'] > 0: self.info( " Compression ratio : %s", '{percent:.2%}'.format( percent=data['wal_until_next_compression_ratio'])) self.info(" Last available : %s", data['wal_last']) if data['children_timelines']: timelines = data['children_timelines'] self.info( " Reachable timelines : %s", ", ".join([str(history.tli) for history in timelines])) self.info("") self.info(" Catalog information:") self.info(" Retention Policy : %s", data['retention_policy_status'] or 'not enforced') self.info(" Previous Backup : %s", data.setdefault('previous_backup_id', 'not available') or '- (this is the oldest base backup)') self.info(" Next Backup : %s", data.setdefault('next_backup_id', 'not available') or '- (this is the latest base backup)') else: if data['error']: self.info(" Error: : %s", data['error']) if data['children_timelines']: self.info("") self.info( "WARNING: WAL information is inaccurate due to " "multiple timelines interacting with this backup")
def result_show_backup(self, backup_ext_info): """ Output all available information about a backup in show-backup command The argument has to be the result of a Server.get_backup_ext_info() call :param dict backup_ext_info: a dictionary containing the info to display """ data = dict(backup_ext_info) self.info("Backup %s:", data["backup_id"]) self.info(" Server Name : %s", data["server_name"]) self.info(" Status : %s", data["status"]) if data["status"] == BackupInfo.DONE: self.info(" PostgreSQL Version : %s", data["version"]) self.info(" PGDATA directory : %s", data["pgdata"]) if data["tablespaces"]: self.info(" Tablespaces:") for item in data["tablespaces"]: self.info(" %s: %s (oid: %s)", item.name, item.location, item.oid) self.info("") self.info(" Base backup information:") self.info( " Disk usage : %s (%s with WALs)", pretty_size(data["size"]), pretty_size(data["size"] + data["wal_size"]), ) if data["deduplicated_size"] is not None and data["size"] > 0: deduplication_ratio = 1 - (float(data["deduplicated_size"]) / data["size"]) self.info( " Incremental size : %s (-%s)", pretty_size(data["deduplicated_size"]), "{percent:.2%}".format(percent=deduplication_ratio), ) self.info(" Timeline : %s", data["timeline"]) self.info(" Begin WAL : %s", data["begin_wal"]) self.info(" End WAL : %s", data["end_wal"]) self.info(" WAL number : %s", data["wal_num"]) # Output WAL compression ratio for basebackup WAL files if data["wal_compression_ratio"] > 0: self.info( " WAL compression ratio: %s", "{percent:.2%}".format(percent=data["wal_compression_ratio"]) ) self.info(" Begin time : %s", data["begin_time"]) self.info(" End time : %s", data["end_time"]) self.info(" Begin Offset : %s", data["begin_offset"]) self.info(" End Offset : %s", data["end_offset"]) self.info(" Begin XLOG : %s", data["begin_xlog"]) self.info(" End XLOG : %s", data["end_xlog"]) self.info("") self.info(" WAL information:") self.info(" No of files : %s", data["wal_until_next_num"]) self.info(" Disk usage : %s", pretty_size(data["wal_until_next_size"])) # Output WAL rate if data["wals_per_second"] > 0: self.info(" WAL rate : %0.2f/hour", data["wals_per_second"] * 3600) # Output WAL compression ratio for archived WAL files if data["wal_until_next_compression_ratio"] > 0: self.info( " Compression ratio : %s", "{percent:.2%}".format(percent=data["wal_until_next_compression_ratio"]), ) self.info(" Last available : %s", data["wal_last"]) self.info("") self.info(" Catalog information:") self.info(" Retention Policy : %s", data["retention_policy_status"] or "not enforced") self.info( " Previous Backup : %s", data.setdefault("previous_backup_id", "not available") or "- (this is the oldest base backup)", ) self.info( " Next Backup : %s", data.setdefault("next_backup_id", "not available") or "- (this is the latest base backup)", ) else: if data["error"]: self.info(" Error: : %s", data["error"])
def close(self): """ Display the result of a check run as expected by Nagios. Also set the exit code as 2 (CRITICAL) in case of errors """ global error_occurred, error_exit_code # List of all servers that have been checked servers = [] # List of servers reporting issues issues = [] # Nagios performance data perf_detail = [] for item in self.result_check_list: # Keep track of all the checked servers if item["server_name"] not in servers: servers.append(item["server_name"]) # Keep track of the servers with issues if not item["status"] and item["server_name"] not in issues: issues.append(item["server_name"]) # Build the performance data list if item["check"] == "backup minimum size": perf_detail.append("%s=%dB" % (item["server_name"], item["hint"])) if item["check"] == "wal size": perf_detail.append("%s_wals=%dB" % (item["server_name"], item["hint"])) # Global error (detected at configuration level) if len(issues) == 0 and error_occurred: print("BARMAN CRITICAL - Global configuration errors") error_exit_code = 2 return if len(issues) > 0: fail_summary = [] details = [] for server in issues: # Join all the issues for a server. Output format is in the # form: # "<server_name> FAILED: <failed_check1>, <failed_check2> ... " # All strings will be concatenated into the $SERVICEOUTPUT$ # macro of the Nagios output server_fail = "%s FAILED: %s" % ( server, ", ".join( [ item["check"] for item in self.result_check_list if item["server_name"] == server and not item["status"] ] ), ) fail_summary.append(server_fail) # Prepare an array with the detailed output for # the $LONGSERVICEOUTPUT$ macro of the Nagios output # line format: # <servername>.<failed_check1>: FAILED # <servername>.<failed_check2>: FAILED (Hint if present) # <servername2.<failed_check1>: FAILED # ..... for issue in self.result_check_list: if issue["server_name"] == server and not issue["status"]: fail_detail = "%s.%s: FAILED" % (server, issue["check"]) if issue["hint"] is not None: if type(issue["hint"]) is int: fail_detail += " (%s)" % pretty_size(issue["hint"]) else: fail_detail += " (%s)" % issue["hint"] details.append(fail_detail) # Append the summary of failures to the first line of the output # using * as delimiter if len(servers) == 1: print( "BARMAN CRITICAL - server %s has issues * %s|%s" % (servers[0], " * ".join(fail_summary), " ".join(perf_detail)) ) else: print( "BARMAN CRITICAL - %d server out of %d have issues * " "%s|%s" % (len(issues), len(servers), " * ".join(fail_summary), " ".join(perf_detail)) ) # add the detailed list to the output for issue in details: print(issue) error_exit_code = 2 else: # No issues, all good! # Display the output message for a single server check if len(servers) == 1: print( "BARMAN OK - Ready to serve the Espresso backup " "for %s|%s" % (servers[0], " ".join(perf_detail)) ) else: # Display the output message for several servers, using # '*' as delimiter print( "BARMAN OK - Ready to serve the Espresso backup " "for %d server(s) * %s|%s" % (len(servers), " * ".join([server for server in servers]), " ".join(perf_detail)) )