def _run_periodic_snapshot_tasks(self, now, tasks): tasks_with_snapshot_names = sorted( [(task, now.strftime(task.naming_schema)) for task in tasks], key=lambda task_with_snapshot_name: ( # Lexicographically less snapshots should go first task_with_snapshot_name[1], # Recursive snapshot with same name as non-recursive should go first 0 if task_with_snapshot_name[0].recursive else 1, # Recursive snapshots without exclude should go first 0 if not task_with_snapshot_name[0].exclude else 1, )) created_snapshots = set() for task, snapshot_name in tasks_with_snapshot_names: snapshot = Snapshot(task.dataset, snapshot_name) if snapshot in created_snapshots: continue try: create_snapshot(self.local_shell, snapshot, task.recursive, task.exclude) except CreateSnapshotError as e: logger.warning("Error creating %r: %r", snapshot, e) else: logger.info("Created %r", snapshot) created_snapshots.add(snapshot) empty_snapshots = get_empty_snapshots_for_deletion( self.local_shell, tasks_with_snapshot_names) if empty_snapshots: logger.info("Destroying empty snapshots: %r", empty_snapshots) destroy_snapshots(self.local_shell, empty_snapshots)
def _run_remote_retention(self, now: datetime): push_replication_tasks = list( filter(self._is_push_replication_task, select_by_class(ReplicationTask, self.tasks))) local_snapshots_grouped = group_snapshots_by_datasets( multilist_snapshots( self.local_shell, replication_tasks_source_datasets_queries( push_replication_tasks))) for transport, replication_tasks in self._transport_for_replication_tasks( push_replication_tasks): shell = self._get_shell(transport) remote_snapshots = multilist_snapshots( shell, [(replication_task.target_dataset, replication_task.recursive) for replication_task in replication_tasks]) remote_snapshots_grouped = group_snapshots_by_datasets( remote_snapshots) owners = [ ExecutedReplicationTaskSnapshotOwner(now, replication_task, local_snapshots_grouped, remote_snapshots_grouped) for replication_task in replication_tasks ] snapshots_to_destroy = calculate_snapshots_to_remove( owners, remote_snapshots) logger.info("Retention on transport %r destroying snapshots: %r", transport, snapshots_to_destroy) destroy_snapshots(shell, snapshots_to_destroy)
def test_zfs_hold(hold): try: subprocess.call("zfs destroy -r data/src", shell=True) subprocess.call("zfs destroy -r data/dst", shell=True) subprocess.check_call("zfs create data/dst", shell=True) for snapshot in snapshots: subprocess.check_call( f"zfs snapshot {snapshot.dataset}@{snapshot.name}", shell=True) for i in hold: snapshot = snapshots[i] subprocess.check_call( f"zfs hold keep {snapshot.dataset}@{snapshot.name}", shell=True) local_shell = LocalShell() destroy_snapshots(local_shell, snapshots) assert list_snapshots(local_shell, "data/dst", False) == [snapshots[i] for i in hold] finally: for snapshot in snapshots: subprocess.call( f"zfs release keep {snapshot.dataset}@{snapshot.name}", shell=True)
def run_replication_steps(step_templates: [ReplicationStepTemplate], observer=None): for step_template in step_templates: src_snapshots = step_template.src_context.datasets[ step_template.src_dataset] dst_snapshots = step_template.dst_context.datasets.get( step_template.dst_dataset, []) incremental_base, snapshots = get_snapshots_to_send( src_snapshots, dst_snapshots, step_template.replication_task) if incremental_base is None and dst_snapshots: if step_template.replication_task.allow_from_scratch: logger.warning( "No incremental base for replication task %r on dataset %r, destroying all destination " "snapshots", step_template.replication_task.id, step_template.src_dataset) destroy_snapshots(step_template.dst_context.shell, [ Snapshot(step_template.dst_dataset, name) for name in dst_snapshots ]) else: raise NoIncrementalBaseReplicationError( f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch " f"is not allowed") if not snapshots: logger.info( "No snapshots to send for replication task %r on dataset %r", step_template.replication_task.id, step_template.src_dataset) continue replicate_snapshots(step_template, incremental_base, snapshots, observer)
def test__destroy_snapshots__works(): shell = Mock() destroy_snapshots(shell, [Snapshot("data", "snap-1"), Snapshot("data/work", "snap-1"), Snapshot("data", "snap-2")]) assert shell.exec.call_count == 2 shell.exec.assert_has_calls([ call(["zfs", "destroy", "data@snap-1%snap-2"]), call(["zfs", "destroy", "data/work@snap-1"]) ], True)
def test__destroy_snapshots__arg_max(): shell = Mock() with patch("zettarepl.snapshot.destroy.ARG_MAX", 20): destroy_snapshots(shell, [Snapshot("data", "snap-1"), Snapshot("data", "snap-2"), Snapshot("data", "snap-3")]) assert shell.exec.call_count == 2 shell.exec.assert_has_calls([ call(["zfs", "destroy", "data@snap-1,snap-2"]), call(["zfs", "destroy", "data@snap-3"]) ], True)
def _run_remote_retention(self, now: datetime): push_replication_tasks = list( filter(self._is_push_replication_task, select_by_class(ReplicationTask, self.tasks))) local_snapshots_grouped = group_snapshots_by_datasets( multilist_snapshots( self.local_shell, replication_tasks_source_datasets_queries( push_replication_tasks))) for transport, replication_tasks in self._transport_for_replication_tasks( push_replication_tasks): shell = self._get_retention_shell(transport) remote_snapshots_queries = [ (replication_task.target_dataset, replication_task.recursive) for replication_task in replication_tasks ] try: # Prevent hanging remote from breaking all the replications with ShellTimeoutContext(3600): remote_snapshots = multilist_snapshots( shell, remote_snapshots_queries) except Exception as e: logger.warning( "Remote retention failed on %r: error listing snapshots: %r", transport, e) continue remote_snapshots_grouped = group_snapshots_by_datasets( remote_snapshots) owners = [ ExecutedReplicationTaskSnapshotOwner(now, replication_task, local_snapshots_grouped, remote_snapshots_grouped) for replication_task in replication_tasks ] snapshots_to_destroy = calculate_snapshots_to_remove( owners, remote_snapshots) logger.info("Retention on %r destroying snapshots: %r", transport, snapshots_to_destroy) try: # Prevent hanging remote from breaking all the replications with ShellTimeoutContext(3600): destroy_snapshots(shell, snapshots_to_destroy) except Exception as e: logger.warning( "Remote retention failed on %r: error destroying snapshots: %r", transport, e) continue
def test_zfs_clone(): subprocess.call("zfs destroy -r data/src", shell=True) subprocess.call("zfs destroy -r data/dst", shell=True) subprocess.check_call("zfs create data/dst", shell=True) for snapshot in snapshots: subprocess.check_call( f"zfs snapshot {snapshot.dataset}@{snapshot.name}", shell=True) subprocess.check_call( f"zfs clone {snapshots[1].dataset}@{snapshots[1].name} data/src", shell=True) local_shell = LocalShell() destroy_snapshots(local_shell, snapshots) assert list_snapshots(local_shell, "data/dst", False) == [snapshots[1]]
def run_replication_steps(step_templates: [ReplicationStepTemplate], observer=None): ignored_roots = set() for step_template in step_templates: ignore = False for ignored_root in ignored_roots: if is_child(step_template.src_dataset, ignored_root): logger.debug("Not replicating dataset %r because it's ancestor %r did not have any snapshots", step_template.src_dataset, ignored_root) ignore = True if ignore: continue src_snapshots = step_template.src_context.datasets[step_template.src_dataset] dst_snapshots = step_template.dst_context.datasets.get(step_template.dst_dataset, []) incremental_base, snapshots = get_snapshots_to_send(src_snapshots, dst_snapshots, step_template.replication_task) if incremental_base is None and dst_snapshots: if step_template.replication_task.allow_from_scratch: logger.warning("No incremental base for replication task %r on dataset %r, destroying all destination " "snapshots", step_template.replication_task.id, step_template.src_dataset) destroy_snapshots( step_template.dst_context.shell, [Snapshot(step_template.dst_dataset, name) for name in dst_snapshots] ) else: raise NoIncrementalBaseReplicationError( f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch " f"is not allowed" ) if not snapshots: logger.info("No snapshots to send for replication task %r on dataset %r", step_template.replication_task.id, step_template.src_dataset) if not src_snapshots: ignored_roots.add(step_template.src_dataset) continue replicate_snapshots(step_template, incremental_base, snapshots, observer)
def _run_local_retention(self, now: datetime): periodic_snapshot_tasks = select_by_class(PeriodicSnapshotTask, self.tasks) replication_tasks = select_by_class(ReplicationTask, self.tasks) push_replication_tasks_that_can_hold = [ replication_task for replication_task in replication_tasks if replication_task.hold_pending_snapshots ] pull_replications_tasks = list( filter(self._is_pull_replication_task, replication_tasks)) local_snapshots_queries = [] local_snapshots_queries.extend([ (periodic_snapshot_task.dataset, periodic_snapshot_task.recursive) for periodic_snapshot_task in periodic_snapshot_tasks ]) local_snapshots_queries.extend( replication_tasks_source_datasets_queries( push_replication_tasks_that_can_hold)) local_snapshots_queries.extend([ (replication_task.target_dataset, replication_task.recursive) for replication_task in pull_replications_tasks ]) local_snapshots = multilist_snapshots(self.local_shell, local_snapshots_queries) local_snapshots_grouped = group_snapshots_by_datasets(local_snapshots) owners = [] owners.extend([ PeriodicSnapshotTaskSnapshotOwner(now, periodic_snapshot_task) for periodic_snapshot_task in periodic_snapshot_tasks ]) # These are always only PUSH replication tasks for transport, replication_tasks in self._transport_for_replication_tasks( push_replication_tasks_that_can_hold): shell = self._get_retention_shell(transport) owners.extend( pending_push_replication_task_snapshot_owners( local_snapshots_grouped, shell, replication_tasks)) for transport, replication_tasks in self._transport_for_replication_tasks( pull_replications_tasks): shell = self._get_retention_shell(transport) remote_snapshots_queries = replication_tasks_source_datasets_queries( replication_tasks) try: remote_snapshots = multilist_snapshots( shell, remote_snapshots_queries) except Exception as e: logger.warning( "Local retention failed: error listing snapshots on %r: %r", transport, e) return remote_snapshots_grouped = group_snapshots_by_datasets( remote_snapshots) owners.extend([ executed_pull_replication_task_snapshot_owner( now, replication_task, remote_snapshots_grouped, local_snapshots_grouped) for replication_task in replication_tasks ]) snapshots_to_destroy = calculate_snapshots_to_remove( owners, local_snapshots) logger.info("Retention destroying local snapshots: %r", snapshots_to_destroy) destroy_snapshots(self.local_shell, snapshots_to_destroy)
def _run_periodic_snapshot_tasks(self, now, tasks): scheduled_tasks = [] for task in tasks: snapshot_name = get_snapshot_name(now, task.naming_schema) try: parsed_snapshot_name = parse_snapshot_name( snapshot_name, task.naming_schema) except ValueError as e: logger.warning( "Unable to parse snapshot name %r with naming schema %r: %s. Skipping task %r", snapshot_name, task.naming_schema, str(e), task, ) notify( self.observer, PeriodicSnapshotTaskError( task.id, "Unable to parse snapshot name %r: %s" % ( snapshot_name, str(e), ))) continue scheduled_tasks.append( ScheduledPeriodicSnapshotTask( task, snapshot_name, parsed_snapshot_name, )) scheduled_tasks = sorted( scheduled_tasks, key=lambda scheduled_task: ( # Common sorting order parsed_snapshot_sort_key(scheduled_task.parsed_snapshot_name), # Recursive snapshot with same name as non-recursive should go first 0 if scheduled_task.task.recursive else 1, # Recursive snapshots without exclude should go first 0 if not scheduled_task.task.exclude else 1, )) tasks_with_snapshot_names = [(scheduled_task.task, scheduled_task.snapshot_name) for scheduled_task in scheduled_tasks] created_snapshots = set() for task, snapshot_name in tasks_with_snapshot_names: snapshot = Snapshot(task.dataset, snapshot_name) if snapshot in created_snapshots: notify(self.observer, PeriodicSnapshotTaskSuccess(task.id)) continue options = notify(self.observer, PeriodicSnapshotTaskStart(task.id)) try: create_snapshot(self.local_shell, snapshot, task.recursive, task.exclude, options.properties) except CreateSnapshotError as e: logger.warning("Error creating %r: %r", snapshot, e) notify(self.observer, PeriodicSnapshotTaskError(task.id, str(e))) else: logger.info("Created %r", snapshot) created_snapshots.add(snapshot) notify(self.observer, PeriodicSnapshotTaskSuccess(task.id)) empty_snapshots = get_empty_snapshots_for_deletion( self.local_shell, tasks_with_snapshot_names) if empty_snapshots: logger.info("Destroying empty snapshots: %r", empty_snapshots) destroy_snapshots(self.local_shell, empty_snapshots)
def run_replication_steps(step_templates: [ReplicationStepTemplate], observer=None): for step_template in step_templates: if step_template.replication_task.readonly == ReadOnlyBehavior.REQUIRE: if not step_template.dst_context.datasets_readonly.get(step_template.dst_dataset, True): raise ReplicationError( f"Target dataset {step_template.dst_dataset!r} exists and does hot have readonly=on property, " "but replication task is set up to require this property. Refusing to replicate." ) plan = [] ignored_roots = set() for i, step_template in enumerate(step_templates): is_immediate_target_dataset = i == 0 ignore = False for ignored_root in ignored_roots: if is_child(step_template.src_dataset, ignored_root): logger.debug("Not replicating dataset %r because it's ancestor %r did not have any snapshots", step_template.src_dataset, ignored_root) ignore = True if ignore: continue src_snapshots = step_template.src_context.datasets[step_template.src_dataset] dst_snapshots = step_template.dst_context.datasets.get(step_template.dst_dataset, []) incremental_base, snapshots = get_snapshots_to_send(src_snapshots, dst_snapshots, step_template.replication_task) if incremental_base is None: if dst_snapshots: if step_template.replication_task.allow_from_scratch: logger.warning( "No incremental base for replication task %r on dataset %r, destroying all destination " "snapshots", step_template.replication_task.id, step_template.src_dataset, ) destroy_snapshots( step_template.dst_context.shell, [Snapshot(step_template.dst_dataset, name) for name in dst_snapshots] ) else: raise NoIncrementalBaseReplicationError( f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch " f"is not allowed" ) else: if not step_template.replication_task.allow_from_scratch: if is_immediate_target_dataset: # We are only interested in checking target datasets, not their children allowed_empty_children = [] if step_template.replication_task.recursive: allowed_dst_child_datasets = { get_target_dataset(step_template.replication_task, dataset) for dataset in ( set(step_template.src_context.datasets) - set(step_template.replication_task.exclude) ) if dataset != step_template.src_dataset and is_child(dataset, step_template.src_dataset) } existing_dst_child_datasets = { dataset for dataset in step_template.dst_context.datasets if dataset != step_template.dst_dataset and is_child(dataset, step_template.dst_dataset) } allowed_empty_children = list(allowed_dst_child_datasets & existing_dst_child_datasets) ensure_has_no_data(step_template.dst_context.shell, step_template.dst_dataset, allowed_empty_children) if not snapshots: logger.info("No snapshots to send for replication task %r on dataset %r", step_template.replication_task.id, step_template.src_dataset) if not src_snapshots: ignored_roots.add(step_template.src_dataset) continue if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets: # Target dataset does not exist, there is a chance that intermediate datasets also do not exist parent = os.path.dirname(step_template.dst_dataset) if "/" in parent: create_dataset(step_template.dst_context.shell, parent) encryption = None if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets: encryption = step_template.replication_task.encryption step_template.src_context.context.snapshots_total_by_replication_step_template[step_template] += len(snapshots) plan.append((step_template, incremental_base, snapshots, encryption)) for step_template, incremental_base, snapshots, encryption in plan: replicate_snapshots(step_template, incremental_base, snapshots, encryption, observer) handle_readonly(step_template)