def _validate_exclude(cls, data, resolved_periodic_snapshot_tasks): for source_dataset in data["source-dataset"]: for periodic_snapshot_task in resolved_periodic_snapshot_tasks: if is_child(source_dataset, periodic_snapshot_task.dataset): for exclude in periodic_snapshot_task.exclude: if is_child(exclude, source_dataset ) and exclude not in data["exclude"]: raise ValueError( "Replication tasks should exclude everything their periodic snapshot tasks exclude " f"(task does not exclude {exclude!r} from periodic snapshot task " f"{periodic_snapshot_task.id!r})")
def executed_pull_replication_task_snapshot_owner( now: datetime, replication_task: ReplicationTask, remote_snapshots: {str: [str]}, local_snapshots: {str: [str]}): return ExecutedReplicationTaskSnapshotOwner( now, replication_task, remote_snapshots, { dataset: snapshots for dataset, snapshots in local_snapshots.items() if is_child(dataset, replication_task.target_dataset) })
def get_task_snapshots(datasets: [str], task: PeriodicSnapshotTask, snapshot_name: str): if task.recursive: return [ Snapshot(dataset, snapshot_name) for dataset in datasets if is_child(dataset, task.dataset) and not should_exclude(dataset, task.exclude) ] else: return [Snapshot(task.dataset, snapshot_name)]
def simplify_snapshot_list_queries(queries: [(str, bool)]) -> [(str, bool)]: simple = [] for dataset, recursive in sorted(queries, key=lambda q: (q[0], 0 if q[1] else 1)): if recursive: queries_may_include_this = filter(lambda q: q[1], simple) else: queries_may_include_this = simple if not any( is_child(dataset, ds) if r else dataset == ds for ds, r in queries_may_include_this): simple.append((dataset, recursive)) return simple
def run_replication_steps(step_templates: [ReplicationStepTemplate], observer=None): ignored_roots = set() for step_template in step_templates: ignore = False for ignored_root in ignored_roots: if is_child(step_template.src_dataset, ignored_root): logger.debug("Not replicating dataset %r because it's ancestor %r did not have any snapshots", step_template.src_dataset, ignored_root) ignore = True if ignore: continue src_snapshots = step_template.src_context.datasets[step_template.src_dataset] dst_snapshots = step_template.dst_context.datasets.get(step_template.dst_dataset, []) incremental_base, snapshots = get_snapshots_to_send(src_snapshots, dst_snapshots, step_template.replication_task) if incremental_base is None and dst_snapshots: if step_template.replication_task.allow_from_scratch: logger.warning("No incremental base for replication task %r on dataset %r, destroying all destination " "snapshots", step_template.replication_task.id, step_template.src_dataset) destroy_snapshots( step_template.dst_context.shell, [Snapshot(step_template.dst_dataset, name) for name in dst_snapshots] ) else: raise NoIncrementalBaseReplicationError( f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch " f"is not allowed" ) if not snapshots: logger.info("No snapshots to send for replication task %r on dataset %r", step_template.replication_task.id, step_template.src_dataset) if not src_snapshots: ignored_roots.add(step_template.src_dataset) continue replicate_snapshots(step_template, incremental_base, snapshots, observer)
def _replication_tasks_can_run_in_parallel(self, t1: ReplicationTask, t2: ReplicationTask): if t1.direction == t2.direction: if not are_same_host(t1.transport, t2.transport): return True return (not is_child(t1.target_dataset, t2.target_dataset) and not is_child(t2.target_dataset, t1.target_dataset)) else: if t1.direction == ReplicationDirection.PULL and t2.direction == ReplicationDirection.PUSH: t1, t2 = t2, t1 # Now t1 is PUSH, t2 is PULL return ( # Do not write to local dataset from which we are pushing all((not is_child(t2.target_dataset, source_dataset) and not is_child(source_dataset, t2.target_dataset)) for source_dataset in t1.source_datasets) and # Do not write to remote dataset from which we are pulling all((not is_child(t1.target_dataset, source_dataset) and not is_child(source_dataset, t1.target_dataset)) for source_dataset in t2.source_datasets))
def from_data(cls, id, data: dict, periodic_snapshot_tasks: [PeriodicSnapshotTask]): replication_task_validator.validate(data) for k in [ "source-dataset", "naming-schema", "also-include-naming-schema" ]: if k in data and isinstance(data[k], str): data[k] = [data[k]] data.setdefault("exclude", []) data.setdefault("properties", True) data.setdefault("replicate", False) data.setdefault("periodic-snapshot-tasks", []) data.setdefault("only-matching-schedule", False) data.setdefault("allow-from-scratch", False) data.setdefault("hold-pending-snapshots", False) data.setdefault("compression", None) data.setdefault("speed-limit", None) data.setdefault("dedup", False) data.setdefault("large-block", False) data.setdefault("embed", False) data.setdefault("compressed", False) data.setdefault("retries", 5) data.setdefault("logging-level", "notset") resolved_periodic_snapshot_tasks = [] for periodic_snapshot_task_id in data["periodic-snapshot-tasks"]: for periodic_snapshot_task in periodic_snapshot_tasks: if periodic_snapshot_task.id == periodic_snapshot_task_id: resolved_periodic_snapshot_tasks.append( periodic_snapshot_task) break else: raise ValueError( f"Periodic snapshot task {periodic_snapshot_task_id!r} does not exist" ) if data["recursive"]: for source_dataset in data["source-dataset"]: for periodic_snapshot_task in resolved_periodic_snapshot_tasks: if is_child(source_dataset, periodic_snapshot_task.dataset): for exclude in periodic_snapshot_task.exclude: if exclude not in data["exclude"]: raise ValueError( "Replication tasks should exclude everything their periodic snapshot tasks exclude " f"(task does not exclude {exclude!r} from periodic snapshot task " f"{periodic_snapshot_task.id!r})") if data["replicate"]: if not data["recursive"]: raise ValueError( "Replication tasks that replicate entire filesystem should be recursive" ) if data["exclude"]: raise ValueError( "Replication tasks that replicate entire filesystem can't exclude datasets" ) if not data["properties"]: raise ValueError( "Replication tasks that replicate entire filesystem can't exclude properties" ) data["direction"] = ReplicationDirection(data["direction"]) if data["direction"] == ReplicationDirection.PUSH: if "naming-schema" in data: raise ValueError( "Push replication task can't have naming-schema") data.setdefault("also-include-naming-schema", []) if not resolved_periodic_snapshot_tasks and not data[ "also-include-naming-schema"]: raise ValueError( "You must at least provide either periodic-snapshot-tasks or also-include-naming-schema " "for push replication task") elif data["direction"] == ReplicationDirection.PULL: if "naming-schema" not in data: raise ValueError( "You must provide naming-schema for pull replication task") if "also-include-naming-schema" in data: raise ValueError( "Pull replication task can't have also-include-naming-schema" ) data.setdefault("also-include-naming-schema", data.pop("naming-schema")) schedule, restrict_schedule = cls._parse_schedules(data) if data["direction"] == ReplicationDirection.PULL: if data["hold-pending-snapshots"]: raise ValueError( "Pull replication tasks can't hold pending snapshots because they don't do source " "retention") retention_policy = TargetSnapshotRetentionPolicy.from_data(data) compression = replication_compressions[ data["compression"]] if data["compression"] else None return cls(id, data["direction"], create_transport(data["transport"]), data["source-dataset"], data["target-dataset"], data["recursive"], data["exclude"], data["properties"], data["replicate"], resolved_periodic_snapshot_tasks, data["also-include-naming-schema"], data["auto"], schedule, restrict_schedule, data["only-matching-schedule"], data["allow-from-scratch"], data["hold-pending-snapshots"], retention_policy, compression, data["speed-limit"], data["dedup"], data["large-block"], data["embed"], data["compressed"], data["retries"], logging._nameToLevel[data["logging-level"].upper()])
def run_replication_steps(step_templates: [ReplicationStepTemplate], observer=None): for step_template in step_templates: if step_template.replication_task.readonly == ReadOnlyBehavior.REQUIRE: if not step_template.dst_context.datasets_readonly.get(step_template.dst_dataset, True): raise ReplicationError( f"Target dataset {step_template.dst_dataset!r} exists and does hot have readonly=on property, " "but replication task is set up to require this property. Refusing to replicate." ) plan = [] ignored_roots = set() for i, step_template in enumerate(step_templates): is_immediate_target_dataset = i == 0 ignore = False for ignored_root in ignored_roots: if is_child(step_template.src_dataset, ignored_root): logger.debug("Not replicating dataset %r because it's ancestor %r did not have any snapshots", step_template.src_dataset, ignored_root) ignore = True if ignore: continue src_snapshots = step_template.src_context.datasets[step_template.src_dataset] dst_snapshots = step_template.dst_context.datasets.get(step_template.dst_dataset, []) incremental_base, snapshots = get_snapshots_to_send(src_snapshots, dst_snapshots, step_template.replication_task) if incremental_base is None: if dst_snapshots: if step_template.replication_task.allow_from_scratch: logger.warning( "No incremental base for replication task %r on dataset %r, destroying all destination " "snapshots", step_template.replication_task.id, step_template.src_dataset, ) destroy_snapshots( step_template.dst_context.shell, [Snapshot(step_template.dst_dataset, name) for name in dst_snapshots] ) else: raise NoIncrementalBaseReplicationError( f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch " f"is not allowed" ) else: if not step_template.replication_task.allow_from_scratch: if is_immediate_target_dataset: # We are only interested in checking target datasets, not their children allowed_empty_children = [] if step_template.replication_task.recursive: allowed_dst_child_datasets = { get_target_dataset(step_template.replication_task, dataset) for dataset in ( set(step_template.src_context.datasets) - set(step_template.replication_task.exclude) ) if dataset != step_template.src_dataset and is_child(dataset, step_template.src_dataset) } existing_dst_child_datasets = { dataset for dataset in step_template.dst_context.datasets if dataset != step_template.dst_dataset and is_child(dataset, step_template.dst_dataset) } allowed_empty_children = list(allowed_dst_child_datasets & existing_dst_child_datasets) ensure_has_no_data(step_template.dst_context.shell, step_template.dst_dataset, allowed_empty_children) if not snapshots: logger.info("No snapshots to send for replication task %r on dataset %r", step_template.replication_task.id, step_template.src_dataset) if not src_snapshots: ignored_roots.add(step_template.src_dataset) continue if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets: # Target dataset does not exist, there is a chance that intermediate datasets also do not exist parent = os.path.dirname(step_template.dst_dataset) if "/" in parent: create_dataset(step_template.dst_context.shell, parent) encryption = None if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets: encryption = step_template.replication_task.encryption step_template.src_context.context.snapshots_total_by_replication_step_template[step_template] += len(snapshots) plan.append((step_template, incremental_base, snapshots, encryption)) for step_template, incremental_base, snapshots, encryption in plan: replicate_snapshots(step_template, incremental_base, snapshots, encryption, observer) handle_readonly(step_template)
def test__is_child(child, parent, result): assert is_child(child, parent) == result
def run_replication_steps(step_templates: [ReplicationStepTemplate], observer=None): for step_template in step_templates: if step_template.replication_task.readonly == ReadOnlyBehavior.REQUIRE: if not step_template.dst_context.datasets_readonly.get( step_template.dst_dataset, True): message = ( f"Target dataset {step_template.dst_dataset!r} exists and does not have readonly=on property, " "but replication task is set up to require this property. Refusing to replicate." ) try: target_type = get_property(step_template.dst_context.shell, step_template.dst_dataset, "type") except Exception: pass else: if target_type == "volume": message += ( f" Please run \"zfs set readonly=on {step_template.dst_dataset}\" on the target system " "to fix this.") raise ReplicationError(message) plan = [] ignored_roots = set() for i, step_template in enumerate(step_templates): is_immediate_target_dataset = i == 0 ignore = False for ignored_root in ignored_roots: if is_child(step_template.src_dataset, ignored_root): logger.debug( "Not replicating dataset %r because it's ancestor %r did not have any snapshots", step_template.src_dataset, ignored_root) ignore = True if ignore: continue src_snapshots = step_template.src_context.datasets[ step_template.src_dataset] dst_snapshots = step_template.dst_context.datasets.get( step_template.dst_dataset, []) incremental_base, snapshots, include_intermediate = get_snapshots_to_send( src_snapshots, dst_snapshots, step_template.replication_task, step_template.src_context.shell, step_template.src_dataset, ) if incremental_base is None and snapshots: if dst_snapshots: if step_template.replication_task.allow_from_scratch: logger.warning( "No incremental base for replication task %r on dataset %r, destroying destination dataset", step_template.replication_task.id, step_template.src_dataset, ) step_template.dst_context.shell.exec( ["zfs", "destroy", "-r", step_template.dst_dataset]) for dictionary in ( step_template.dst_context.datasets, step_template.dst_context.datasets_encrypted, step_template.dst_context.datasets_readonly, step_template.dst_context. datasets_receive_resume_tokens, ): if dictionary is None: continue for k in list(dictionary.keys()): if k == step_template.dst_dataset or k.startswith( f"{step_template.dst_dataset}/"): dictionary.pop(k) else: raise NoIncrementalBaseReplicationError( f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch " f"is not allowed") else: if not step_template.replication_task.allow_from_scratch: if is_immediate_target_dataset: # We are only interested in checking target datasets, not their children allowed_empty_children = [] if step_template.replication_task.recursive: allowed_dst_child_datasets = { get_target_dataset( step_template.replication_task, dataset) for dataset in (set(step_template.src_context.datasets) - set(step_template.replication_task.exclude)) if dataset != step_template.src_dataset and is_child(dataset, step_template.src_dataset) } existing_dst_child_datasets = { dataset for dataset in step_template.dst_context.datasets if dataset != step_template.dst_dataset and is_child(dataset, step_template.dst_dataset) } allowed_empty_children = list( allowed_dst_child_datasets & existing_dst_child_datasets) ensure_has_no_data(step_template.dst_context.shell, step_template.dst_dataset, allowed_empty_children) if not snapshots: logger.info( "No snapshots to send for replication task %r on dataset %r", step_template.replication_task.id, step_template.src_dataset) if is_immediate_target_dataset and incremental_base is None: raise ReplicationError( f"Dataset {step_template.src_dataset!r} does not have any matching snapshots to replicate" ) if not src_snapshots: ignored_roots.add(step_template.src_dataset) continue if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets: # Target dataset does not exist, there is a chance that intermediate datasets also do not exist parent = os.path.dirname(step_template.dst_dataset) if "/" in parent: create_dataset(step_template.dst_context.shell, parent) encryption = None if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets: encryption = step_template.replication_task.encryption step_template.src_context.context.snapshots_total_by_replication_step_template[ step_template] += len(snapshots) plan.append((step_template, incremental_base, snapshots, include_intermediate, encryption)) for step_template, incremental_base, snapshots, include_intermediate, encryption in plan: replicate_snapshots(step_template, incremental_base, snapshots, include_intermediate, encryption, observer) handle_readonly(step_template)