def __init__(self, argv, print_arguments=True): # helps with investigating failed regression tests: if print_arguments: print("ARGUMENTS: " + " ".join(argv)) parser = argparse.ArgumentParser( description=self.HEADER, epilog='Full manual at: https://github.com/psy0rz/zfs_autobackup') parser.add_argument('--ssh-config', metavar='CONFIG-FILE', default=None, help='Custom ssh client config') parser.add_argument('--ssh-source', metavar='USER@HOST', default=None, help='Source host to get backup from.') parser.add_argument('--ssh-target', metavar='USER@HOST', default=None, help='Target host to push backup to.') parser.add_argument( '--keep-source', metavar='SCHEDULE', type=str, default="10,1d1w,1w1m,1m1y", help= 'Thinning schedule for old source snapshots. Default: %(default)s') parser.add_argument( '--keep-target', metavar='SCHEDULE', type=str, default="10,1d1w,1w1m,1m1y", help= 'Thinning schedule for old target snapshots. Default: %(default)s') parser.add_argument( 'backup_name', metavar='backup-name', help= 'Name of the backup (you should set the zfs property "autobackup:backup-name" to ' 'true on filesystems you want to backup') parser.add_argument( 'target_path', metavar='target-path', default=None, nargs='?', help= 'Target ZFS filesystem (optional: if not specified, zfs-autobackup will only operate ' 'as snapshot-tool on source)') parser.add_argument( '--other-snapshots', action='store_true', help= 'Send over other snapshots as well, not just the ones created by this tool.' ) parser.add_argument( '--no-snapshot', action='store_true', help= 'Don\'t create new snapshots (useful for finishing uncompleted backups, or cleanups)' ) parser.add_argument( '--no-send', action='store_true', help= 'Don\'t send snapshots (useful for cleanups, or if you want a serperate send-cronjob)' ) parser.add_argument('--no-thinning', action='store_true', help="Do not destroy any snapshots.") parser.add_argument( '--no-holds', action='store_true', help= 'Don\'t hold snapshots. (Faster. Allows you to destroy common snapshot.)' ) parser.add_argument( '--min-change', metavar='BYTES', type=int, default=1, help= 'Number of bytes written after which we consider a dataset changed (default %(' 'default)s)') parser.add_argument( '--allow-empty', action='store_true', help= 'If nothing has changed, still create empty snapshots. (same as --min-change=0)' ) parser.add_argument( '--ignore-replicated', action='store_true', help= 'Ignore datasets that seem to be replicated some other way. (No changes since ' 'lastest snapshot. Useful for proxmox HA replication)') parser.add_argument('--resume', action='store_true', help=argparse.SUPPRESS) parser.add_argument( '--strip-path', metavar='N', default=0, type=int, help= 'Number of directories to strip from target path (use 1 when cloning zones between 2 ' 'SmartOS machines)') # parser.add_argument('--buffer', default="", help='Use mbuffer with specified size to speedup zfs transfer. # (e.g. --buffer 1G) Will also show nice progress output.') parser.add_argument( '--clear-refreservation', action='store_true', help= 'Filter "refreservation" property. (recommended, safes space. same as ' '--filter-properties refreservation)') parser.add_argument( '--clear-mountpoint', action='store_true', help= 'Set property canmount=noauto for new datasets. (recommended, prevents mount ' 'conflicts. same as --set-properties canmount=noauto)') parser.add_argument( '--filter-properties', metavar='PROPERY,...', type=str, help= 'List of properties to "filter" when receiving filesystems. (you can still restore ' 'them with zfs inherit -S)') parser.add_argument( '--set-properties', metavar='PROPERTY=VALUE,...', type=str, help= 'List of propererties to override when receiving filesystems. (you can still restore ' 'them with zfs inherit -S)') parser.add_argument( '--rollback', action='store_true', help= 'Rollback changes to the latest target snapshot before starting. (normally you can ' 'prevent changes by setting the readonly property on the target_path to on)' ) parser.add_argument( '--destroy-incompatible', action='store_true', help= 'Destroy incompatible snapshots on target. Use with care! (implies --rollback)' ) parser.add_argument( '--destroy-missing', metavar="SCHEDULE", type=str, default=None, help= 'Destroy datasets on target that are missing on the source. Specify the time since ' 'the last snapshot, e.g: --destroy-missing 30d') parser.add_argument( '--ignore-transfer-errors', action='store_true', help= 'Ignore transfer errors (still checks if received filesystem exists. useful for ' 'acltype errors)') parser.add_argument( '--raw', action='store_true', help= 'For encrypted datasets, send data exactly as it exists on disk.') parser.add_argument( '--test', action='store_true', help= 'dont change anything, just show what would be done (still does all read-only ' 'operations)') parser.add_argument('--verbose', action='store_true', help='verbose output') parser.add_argument( '--debug', action='store_true', help= 'Show zfs commands that are executed, stops after an exception.') parser.add_argument( '--debug-output', action='store_true', help='Show zfs commands and their output/exit codes. (noisy)') parser.add_argument( '--progress', action='store_true', help= 'show zfs progress output. Enabled automaticly on ttys. (use --no-progress to disable)' ) parser.add_argument( '--no-progress', action='store_true', help=argparse.SUPPRESS) # needed to workaround a zfs recv -v bug parser.add_argument('--send-pipe', metavar="COMMAND", default=[], action='append', help='pipe zfs send output through COMMAND') parser.add_argument('--recv-pipe', metavar="COMMAND", default=[], action='append', help='pipe zfs recv input through COMMAND') # note args is the only global variable we use, since its a global readonly setting anyway args = parser.parse_args(argv) self.args = args # auto enable progress? if sys.stderr.isatty() and not args.no_progress: args.progress = True if args.debug_output: args.debug = True if self.args.test: self.args.verbose = True if args.allow_empty: args.min_change = 0 if args.destroy_incompatible: args.rollback = True self.log = LogConsole(show_debug=self.args.debug, show_verbose=self.args.verbose, color=sys.stdout.isatty()) if args.resume: self.verbose( "NOTE: The --resume option isn't needed anymore (its autodetected now)" ) if args.target_path is not None and args.target_path[0] == "/": self.log.error("Target should not start with a /") sys.exit(255)
class ZfsAutobackup: """main class""" VERSION = "3.1-beta3" HEADER = "zfs-autobackup v{} - Copyright 2020 E.H.Eefting ([email protected])".format( VERSION) def __init__(self, argv, print_arguments=True): # helps with investigating failed regression tests: if print_arguments: print("ARGUMENTS: " + " ".join(argv)) parser = argparse.ArgumentParser( description=self.HEADER, epilog='Full manual at: https://github.com/psy0rz/zfs_autobackup') parser.add_argument('--ssh-config', metavar='CONFIG-FILE', default=None, help='Custom ssh client config') parser.add_argument('--ssh-source', metavar='USER@HOST', default=None, help='Source host to get backup from.') parser.add_argument('--ssh-target', metavar='USER@HOST', default=None, help='Target host to push backup to.') parser.add_argument( '--keep-source', metavar='SCHEDULE', type=str, default="10,1d1w,1w1m,1m1y", help= 'Thinning schedule for old source snapshots. Default: %(default)s') parser.add_argument( '--keep-target', metavar='SCHEDULE', type=str, default="10,1d1w,1w1m,1m1y", help= 'Thinning schedule for old target snapshots. Default: %(default)s') parser.add_argument( 'backup_name', metavar='backup-name', help= 'Name of the backup (you should set the zfs property "autobackup:backup-name" to ' 'true on filesystems you want to backup') parser.add_argument( 'target_path', metavar='target-path', default=None, nargs='?', help= 'Target ZFS filesystem (optional: if not specified, zfs-autobackup will only operate ' 'as snapshot-tool on source)') parser.add_argument( '--other-snapshots', action='store_true', help= 'Send over other snapshots as well, not just the ones created by this tool.' ) parser.add_argument( '--no-snapshot', action='store_true', help= 'Don\'t create new snapshots (useful for finishing uncompleted backups, or cleanups)' ) parser.add_argument( '--no-send', action='store_true', help= 'Don\'t send snapshots (useful for cleanups, or if you want a serperate send-cronjob)' ) parser.add_argument('--no-thinning', action='store_true', help="Do not destroy any snapshots.") parser.add_argument( '--no-holds', action='store_true', help= 'Don\'t hold snapshots. (Faster. Allows you to destroy common snapshot.)' ) parser.add_argument( '--min-change', metavar='BYTES', type=int, default=1, help= 'Number of bytes written after which we consider a dataset changed (default %(' 'default)s)') parser.add_argument( '--allow-empty', action='store_true', help= 'If nothing has changed, still create empty snapshots. (same as --min-change=0)' ) parser.add_argument( '--ignore-replicated', action='store_true', help= 'Ignore datasets that seem to be replicated some other way. (No changes since ' 'lastest snapshot. Useful for proxmox HA replication)') parser.add_argument('--resume', action='store_true', help=argparse.SUPPRESS) parser.add_argument( '--strip-path', metavar='N', default=0, type=int, help= 'Number of directories to strip from target path (use 1 when cloning zones between 2 ' 'SmartOS machines)') # parser.add_argument('--buffer', default="", help='Use mbuffer with specified size to speedup zfs transfer. # (e.g. --buffer 1G) Will also show nice progress output.') parser.add_argument( '--clear-refreservation', action='store_true', help= 'Filter "refreservation" property. (recommended, safes space. same as ' '--filter-properties refreservation)') parser.add_argument( '--clear-mountpoint', action='store_true', help= 'Set property canmount=noauto for new datasets. (recommended, prevents mount ' 'conflicts. same as --set-properties canmount=noauto)') parser.add_argument( '--filter-properties', metavar='PROPERY,...', type=str, help= 'List of properties to "filter" when receiving filesystems. (you can still restore ' 'them with zfs inherit -S)') parser.add_argument( '--set-properties', metavar='PROPERTY=VALUE,...', type=str, help= 'List of propererties to override when receiving filesystems. (you can still restore ' 'them with zfs inherit -S)') parser.add_argument( '--rollback', action='store_true', help= 'Rollback changes to the latest target snapshot before starting. (normally you can ' 'prevent changes by setting the readonly property on the target_path to on)' ) parser.add_argument( '--destroy-incompatible', action='store_true', help= 'Destroy incompatible snapshots on target. Use with care! (implies --rollback)' ) parser.add_argument( '--destroy-missing', metavar="SCHEDULE", type=str, default=None, help= 'Destroy datasets on target that are missing on the source. Specify the time since ' 'the last snapshot, e.g: --destroy-missing 30d') parser.add_argument( '--ignore-transfer-errors', action='store_true', help= 'Ignore transfer errors (still checks if received filesystem exists. useful for ' 'acltype errors)') parser.add_argument( '--raw', action='store_true', help= 'For encrypted datasets, send data exactly as it exists on disk.') parser.add_argument( '--test', action='store_true', help= 'dont change anything, just show what would be done (still does all read-only ' 'operations)') parser.add_argument('--verbose', action='store_true', help='verbose output') parser.add_argument( '--debug', action='store_true', help= 'Show zfs commands that are executed, stops after an exception.') parser.add_argument( '--debug-output', action='store_true', help='Show zfs commands and their output/exit codes. (noisy)') parser.add_argument( '--progress', action='store_true', help= 'show zfs progress output. Enabled automaticly on ttys. (use --no-progress to disable)' ) parser.add_argument( '--no-progress', action='store_true', help=argparse.SUPPRESS) # needed to workaround a zfs recv -v bug parser.add_argument('--send-pipe', metavar="COMMAND", default=[], action='append', help='pipe zfs send output through COMMAND') parser.add_argument('--recv-pipe', metavar="COMMAND", default=[], action='append', help='pipe zfs recv input through COMMAND') # note args is the only global variable we use, since its a global readonly setting anyway args = parser.parse_args(argv) self.args = args # auto enable progress? if sys.stderr.isatty() and not args.no_progress: args.progress = True if args.debug_output: args.debug = True if self.args.test: self.args.verbose = True if args.allow_empty: args.min_change = 0 if args.destroy_incompatible: args.rollback = True self.log = LogConsole(show_debug=self.args.debug, show_verbose=self.args.verbose, color=sys.stdout.isatty()) if args.resume: self.verbose( "NOTE: The --resume option isn't needed anymore (its autodetected now)" ) if args.target_path is not None and args.target_path[0] == "/": self.log.error("Target should not start with a /") sys.exit(255) def verbose(self, txt): self.log.verbose(txt) def error(self, txt): self.log.error(txt) def debug(self, txt): self.log.debug(txt) def set_title(self, title): self.log.verbose("") self.log.verbose("#### " + title) # NOTE: this method also uses self.args. args that need extra processing are passed as function parameters: def thin_missing_targets(self, target_dataset, used_target_datasets): """thin target datasets that are missing on the source.""" self.debug("Thinning obsolete datasets") for dataset in target_dataset.recursive_datasets: try: if dataset not in used_target_datasets: dataset.debug("Missing on source, thinning") dataset.thin() except Exception as e: dataset.error( "Error during thinning of missing datasets ({})".format( str(e))) # NOTE: this method also uses self.args. args that need extra processing are passed as function parameters: def destroy_missing_targets(self, target_dataset, used_target_datasets): """destroy target datasets that are missing on the source and that meet the requirements""" self.debug("Destroying obsolete datasets") for dataset in target_dataset.recursive_datasets: try: if dataset not in used_target_datasets: # cant do anything without our own snapshots if not dataset.our_snapshots: if dataset.datasets: # its not a leaf, just ignore dataset.debug("Destroy missing: ignoring") else: dataset.verbose( "Destroy missing: has no snapshots made by us. (please destroy manually)" ) else: # past the deadline? deadline_ttl = ThinnerRule( "0s" + self.args.destroy_missing).ttl now = int(time.time()) if dataset.our_snapshots[ -1].timestamp + deadline_ttl > now: dataset.verbose( "Destroy missing: Waiting for deadline.") else: dataset.debug( "Destroy missing: Removing our snapshots.") # remove all our snaphots, except last, to safe space in case we fail later on for snapshot in dataset.our_snapshots[:-1]: snapshot.destroy(fail_exception=True) # does it have other snapshots? has_others = False for snapshot in dataset.snapshots: if not snapshot.is_ours(): has_others = True break if has_others: dataset.verbose( "Destroy missing: Still in use by other snapshots" ) else: if dataset.datasets: dataset.verbose( "Destroy missing: Still has children here." ) else: dataset.verbose("Destroy missing.") dataset.our_snapshots[-1].destroy( fail_exception=True) dataset.destroy(fail_exception=True) except Exception as e: dataset.error("Error during --destroy-missing: {}".format( str(e))) # NOTE: this method also uses self.args. args that need extra processing are passed as function parameters: def sync_datasets(self, source_node, source_datasets, target_node): """Sync datasets, or thin-only on both sides :type target_node: ZfsNode :type source_datasets: list of ZfsDataset :type source_node: ZfsNode """ fail_count = 0 target_datasets = [] for source_dataset in source_datasets: try: # determine corresponding target_dataset target_name = self.args.target_path + "/" + source_dataset.lstrip_path( self.args.strip_path) target_dataset = ZfsDataset(target_node, target_name) target_datasets.append(target_dataset) # ensure parents exists # TODO: this isnt perfect yet, in some cases it can create parents when it shouldn't. if not self.args.no_send \ and target_dataset.parent not in target_datasets \ and not target_dataset.parent.exists: target_dataset.parent.create_filesystem(parents=True) # determine common zpool features (cached, so no problem we call it often) source_features = source_node.get_zfs_pool( source_dataset.split_path()[0]).features target_features = target_node.get_zfs_pool( target_dataset.split_path()[0]).features common_features = source_features and target_features # sync the snapshots of this dataset source_dataset.sync_snapshots( target_dataset, show_progress=self.args.progress, features=common_features, filter_properties=self.filter_properties_list(), set_properties=self.set_properties_list(), ignore_recv_exit_code=self.args.ignore_transfer_errors, holds=not self.args.no_holds, rollback=self.args.rollback, raw=self.args.raw, also_other_snapshots=self.args.other_snapshots, no_send=self.args.no_send, destroy_incompatible=self.args.destroy_incompatible, output_pipes=self.args.send_pipe, input_pipes=self.args.recv_pipe) except Exception as e: fail_count = fail_count + 1 source_dataset.error("FAILED: " + str(e)) if self.args.debug: raise target_path_dataset = ZfsDataset(target_node, self.args.target_path) self.thin_missing_targets(target_dataset=target_path_dataset, used_target_datasets=target_datasets) if self.args.destroy_missing is not None: self.destroy_missing_targets(target_dataset=target_path_dataset, used_target_datasets=target_datasets) return fail_count def thin_source(self, source_datasets): if not self.args.no_thinning: self.set_title("Thinning source") for source_dataset in source_datasets: source_dataset.thin(skip_holds=True) def filter_replicated(self, datasets): if not self.args.ignore_replicated: return datasets else: self.set_title("Filtering already replicated filesystems") ret = [] for dataset in datasets: if dataset.is_changed(self.args.min_change): ret.append(dataset) else: dataset.verbose("Ignoring, already replicated") return ret def filter_properties_list(self): if self.args.filter_properties: filter_properties = self.args.filter_properties.split(",") else: filter_properties = [] if self.args.clear_refreservation: filter_properties.append("refreservation") return filter_properties def set_properties_list(self): if self.args.set_properties: set_properties = self.args.set_properties.split(",") else: set_properties = [] if self.args.clear_mountpoint: set_properties.append("canmount=noauto") return set_properties def run(self): try: self.verbose(self.HEADER) if self.args.test: self.verbose( "TEST MODE - SIMULATING WITHOUT MAKING ANY CHANGES") self.set_title("Source settings") description = "[Source]" if self.args.no_thinning: source_thinner = None else: source_thinner = Thinner(self.args.keep_source) source_node = ZfsNode(self.args.backup_name, self, ssh_config=self.args.ssh_config, ssh_to=self.args.ssh_source, readonly=self.args.test, debug_output=self.args.debug_output, description=description, thinner=source_thinner) source_node.verbose( "Selects all datasets that have property 'autobackup:{}=true' (or childs of datasets that have " "'autobackup:{}=child')".format(self.args.backup_name, self.args.backup_name)) self.set_title("Selecting") selected_source_datasets = source_node.selected_datasets if not selected_source_datasets: self.error( "No source filesystems selected, please do a 'zfs set autobackup:{0}=true' on the source datasets " "you want to select.".format(self.args.backup_name)) return 255 # filter out already replicated stuff? source_datasets = self.filter_replicated(selected_source_datasets) if not self.args.no_snapshot: self.set_title("Snapshotting") source_node.consistent_snapshot( source_datasets, source_node.new_snapshotname(), min_changed_bytes=self.args.min_change) # if target is specified, we sync the datasets, otherwise we just thin the source. (e.g. snapshot mode) if self.args.target_path: # create target_node self.set_title("Target settings") if self.args.no_thinning: target_thinner = None else: target_thinner = Thinner(self.args.keep_target) target_node = ZfsNode(self.args.backup_name, self, ssh_config=self.args.ssh_config, ssh_to=self.args.ssh_target, readonly=self.args.test, debug_output=self.args.debug_output, description="[Target]", thinner=target_thinner) target_node.verbose("Receive datasets under: {}".format( self.args.target_path)) self.set_title("Synchronising") # check if exists, to prevent vague errors target_dataset = ZfsDataset(target_node, self.args.target_path) if not target_dataset.exists: raise (Exception( "Target path '{}' does not exist. Please create this dataset first." .format(target_dataset))) # do the actual sync # NOTE: even with no_send, no_thinning and no_snapshot it does a usefull thing because it checks if the common snapshots and shows incompatible snapshots fail_count = self.sync_datasets( source_node=source_node, source_datasets=source_datasets, target_node=target_node) #no target specified, run in snapshot-only mode else: self.thin_source(source_datasets) fail_count = 0 if not fail_count: if self.args.test: self.set_title("All tests successful.") else: self.set_title("All operations completed successfully") if not self.args.target_path: self.verbose( "(No target_path specified, only operated as snapshot tool.)" ) else: if fail_count != 255: self.error("{} failures!".format(fail_count)) if self.args.test: self.verbose("") self.verbose("TEST MODE - DID NOT MAKE ANY CHANGES!") return fail_count except Exception as e: self.error("Exception: " + str(e)) if self.args.debug: raise return 255 except KeyboardInterrupt: self.error("Aborted") return 255