Пример #1
0
 def _validate_exclude(cls, data, resolved_periodic_snapshot_tasks):
     for source_dataset in data["source-dataset"]:
         for periodic_snapshot_task in resolved_periodic_snapshot_tasks:
             if is_child(source_dataset, periodic_snapshot_task.dataset):
                 for exclude in periodic_snapshot_task.exclude:
                     if is_child(exclude, source_dataset
                                 ) and exclude not in data["exclude"]:
                         raise ValueError(
                             "Replication tasks should exclude everything their periodic snapshot tasks exclude "
                             f"(task does not exclude {exclude!r} from periodic snapshot task "
                             f"{periodic_snapshot_task.id!r})")
Пример #2
0
def executed_pull_replication_task_snapshot_owner(
        now: datetime, replication_task: ReplicationTask,
        remote_snapshots: {str: [str]}, local_snapshots: {str: [str]}):
    return ExecutedReplicationTaskSnapshotOwner(
        now, replication_task, remote_snapshots, {
            dataset: snapshots
            for dataset, snapshots in local_snapshots.items()
            if is_child(dataset, replication_task.target_dataset)
        })
Пример #3
0
def get_task_snapshots(datasets: [str], task: PeriodicSnapshotTask, snapshot_name: str):
    if task.recursive:
        return [
            Snapshot(dataset, snapshot_name)
            for dataset in datasets
            if is_child(dataset, task.dataset) and not should_exclude(dataset, task.exclude)
        ]
    else:
        return [Snapshot(task.dataset, snapshot_name)]
Пример #4
0
def simplify_snapshot_list_queries(queries: [(str, bool)]) -> [(str, bool)]:
    simple = []
    for dataset, recursive in sorted(queries,
                                     key=lambda q: (q[0], 0 if q[1] else 1)):
        if recursive:
            queries_may_include_this = filter(lambda q: q[1], simple)
        else:
            queries_may_include_this = simple

        if not any(
                is_child(dataset, ds) if r else dataset == ds
                for ds, r in queries_may_include_this):
            simple.append((dataset, recursive))

    return simple
Пример #5
0
def run_replication_steps(step_templates: [ReplicationStepTemplate], observer=None):
    ignored_roots = set()
    for step_template in step_templates:
        ignore = False
        for ignored_root in ignored_roots:
            if is_child(step_template.src_dataset, ignored_root):
                logger.debug("Not replicating dataset %r because it's ancestor %r did not have any snapshots",
                             step_template.src_dataset, ignored_root)
                ignore = True
        if ignore:
            continue

        src_snapshots = step_template.src_context.datasets[step_template.src_dataset]
        dst_snapshots = step_template.dst_context.datasets.get(step_template.dst_dataset, [])

        incremental_base, snapshots = get_snapshots_to_send(src_snapshots, dst_snapshots,
                                                            step_template.replication_task)
        if incremental_base is None and dst_snapshots:
            if step_template.replication_task.allow_from_scratch:
                logger.warning("No incremental base for replication task %r on dataset %r, destroying all destination "
                               "snapshots", step_template.replication_task.id, step_template.src_dataset)
                destroy_snapshots(
                    step_template.dst_context.shell,
                    [Snapshot(step_template.dst_dataset, name) for name in dst_snapshots]
                )
            else:
                raise NoIncrementalBaseReplicationError(
                    f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch "
                    f"is not allowed"
                )

        if not snapshots:
            logger.info("No snapshots to send for replication task %r on dataset %r", step_template.replication_task.id,
                        step_template.src_dataset)
            if not src_snapshots:
                ignored_roots.add(step_template.src_dataset)
            continue

        replicate_snapshots(step_template, incremental_base, snapshots, observer)
Пример #6
0
    def _replication_tasks_can_run_in_parallel(self, t1: ReplicationTask,
                                               t2: ReplicationTask):
        if t1.direction == t2.direction:
            if not are_same_host(t1.transport, t2.transport):
                return True

            return (not is_child(t1.target_dataset, t2.target_dataset)
                    and not is_child(t2.target_dataset, t1.target_dataset))
        else:
            if t1.direction == ReplicationDirection.PULL and t2.direction == ReplicationDirection.PUSH:
                t1, t2 = t2, t1
            # Now t1 is PUSH, t2 is PULL

            return (
                # Do not write to local dataset from which we are pushing
                all((not is_child(t2.target_dataset, source_dataset)
                     and not is_child(source_dataset, t2.target_dataset))
                    for source_dataset in t1.source_datasets) and

                # Do not write to remote dataset from which we are pulling
                all((not is_child(t1.target_dataset, source_dataset)
                     and not is_child(source_dataset, t1.target_dataset))
                    for source_dataset in t2.source_datasets))
Пример #7
0
    def from_data(cls, id, data: dict,
                  periodic_snapshot_tasks: [PeriodicSnapshotTask]):
        replication_task_validator.validate(data)

        for k in [
                "source-dataset", "naming-schema", "also-include-naming-schema"
        ]:
            if k in data and isinstance(data[k], str):
                data[k] = [data[k]]

        data.setdefault("exclude", [])
        data.setdefault("properties", True)
        data.setdefault("replicate", False)
        data.setdefault("periodic-snapshot-tasks", [])
        data.setdefault("only-matching-schedule", False)
        data.setdefault("allow-from-scratch", False)
        data.setdefault("hold-pending-snapshots", False)
        data.setdefault("compression", None)
        data.setdefault("speed-limit", None)
        data.setdefault("dedup", False)
        data.setdefault("large-block", False)
        data.setdefault("embed", False)
        data.setdefault("compressed", False)
        data.setdefault("retries", 5)
        data.setdefault("logging-level", "notset")

        resolved_periodic_snapshot_tasks = []
        for periodic_snapshot_task_id in data["periodic-snapshot-tasks"]:
            for periodic_snapshot_task in periodic_snapshot_tasks:
                if periodic_snapshot_task.id == periodic_snapshot_task_id:
                    resolved_periodic_snapshot_tasks.append(
                        periodic_snapshot_task)
                    break
            else:
                raise ValueError(
                    f"Periodic snapshot task {periodic_snapshot_task_id!r} does not exist"
                )

        if data["recursive"]:
            for source_dataset in data["source-dataset"]:
                for periodic_snapshot_task in resolved_periodic_snapshot_tasks:
                    if is_child(source_dataset,
                                periodic_snapshot_task.dataset):
                        for exclude in periodic_snapshot_task.exclude:
                            if exclude not in data["exclude"]:
                                raise ValueError(
                                    "Replication tasks should exclude everything their periodic snapshot tasks exclude "
                                    f"(task does not exclude {exclude!r} from periodic snapshot task "
                                    f"{periodic_snapshot_task.id!r})")

        if data["replicate"]:
            if not data["recursive"]:
                raise ValueError(
                    "Replication tasks that replicate entire filesystem should be recursive"
                )
            if data["exclude"]:
                raise ValueError(
                    "Replication tasks that replicate entire filesystem can't exclude datasets"
                )
            if not data["properties"]:
                raise ValueError(
                    "Replication tasks that replicate entire filesystem can't exclude properties"
                )

        data["direction"] = ReplicationDirection(data["direction"])

        if data["direction"] == ReplicationDirection.PUSH:
            if "naming-schema" in data:
                raise ValueError(
                    "Push replication task can't have naming-schema")

            data.setdefault("also-include-naming-schema", [])

            if not resolved_periodic_snapshot_tasks and not data[
                    "also-include-naming-schema"]:
                raise ValueError(
                    "You must at least provide either periodic-snapshot-tasks or also-include-naming-schema "
                    "for push replication task")

        elif data["direction"] == ReplicationDirection.PULL:
            if "naming-schema" not in data:
                raise ValueError(
                    "You must provide naming-schema for pull replication task")

            if "also-include-naming-schema" in data:
                raise ValueError(
                    "Pull replication task can't have also-include-naming-schema"
                )

            data.setdefault("also-include-naming-schema",
                            data.pop("naming-schema"))

        schedule, restrict_schedule = cls._parse_schedules(data)

        if data["direction"] == ReplicationDirection.PULL:
            if data["hold-pending-snapshots"]:
                raise ValueError(
                    "Pull replication tasks can't hold pending snapshots because they don't do source "
                    "retention")

        retention_policy = TargetSnapshotRetentionPolicy.from_data(data)

        compression = replication_compressions[
            data["compression"]] if data["compression"] else None

        return cls(id, data["direction"], create_transport(data["transport"]),
                   data["source-dataset"], data["target-dataset"],
                   data["recursive"], data["exclude"], data["properties"],
                   data["replicate"], resolved_periodic_snapshot_tasks,
                   data["also-include-naming-schema"], data["auto"], schedule,
                   restrict_schedule, data["only-matching-schedule"],
                   data["allow-from-scratch"], data["hold-pending-snapshots"],
                   retention_policy, compression, data["speed-limit"],
                   data["dedup"], data["large-block"], data["embed"],
                   data["compressed"], data["retries"],
                   logging._nameToLevel[data["logging-level"].upper()])
Пример #8
0
def run_replication_steps(step_templates: [ReplicationStepTemplate], observer=None):
    for step_template in step_templates:
        if step_template.replication_task.readonly == ReadOnlyBehavior.REQUIRE:
            if not step_template.dst_context.datasets_readonly.get(step_template.dst_dataset, True):
                raise ReplicationError(
                    f"Target dataset {step_template.dst_dataset!r} exists and does hot have readonly=on property, "
                    "but replication task is set up to require this property. Refusing to replicate."
                )

    plan = []
    ignored_roots = set()
    for i, step_template in enumerate(step_templates):
        is_immediate_target_dataset = i == 0

        ignore = False
        for ignored_root in ignored_roots:
            if is_child(step_template.src_dataset, ignored_root):
                logger.debug("Not replicating dataset %r because it's ancestor %r did not have any snapshots",
                             step_template.src_dataset, ignored_root)
                ignore = True
        if ignore:
            continue

        src_snapshots = step_template.src_context.datasets[step_template.src_dataset]
        dst_snapshots = step_template.dst_context.datasets.get(step_template.dst_dataset, [])

        incremental_base, snapshots = get_snapshots_to_send(src_snapshots, dst_snapshots,
                                                            step_template.replication_task)
        if incremental_base is None:
            if dst_snapshots:
                if step_template.replication_task.allow_from_scratch:
                    logger.warning(
                        "No incremental base for replication task %r on dataset %r, destroying all destination "
                        "snapshots", step_template.replication_task.id, step_template.src_dataset,
                    )
                    destroy_snapshots(
                        step_template.dst_context.shell,
                        [Snapshot(step_template.dst_dataset, name) for name in dst_snapshots]
                    )
                else:
                    raise NoIncrementalBaseReplicationError(
                        f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch "
                        f"is not allowed"
                    )
            else:
                if not step_template.replication_task.allow_from_scratch:
                    if is_immediate_target_dataset:
                        # We are only interested in checking target datasets, not their children

                        allowed_empty_children = []
                        if step_template.replication_task.recursive:
                            allowed_dst_child_datasets = {
                                get_target_dataset(step_template.replication_task, dataset)
                                for dataset in (
                                    set(step_template.src_context.datasets) -
                                    set(step_template.replication_task.exclude)
                                )
                                if dataset != step_template.src_dataset and is_child(dataset, step_template.src_dataset)
                            }
                            existing_dst_child_datasets = {
                                dataset
                                for dataset in step_template.dst_context.datasets
                                if dataset != step_template.dst_dataset and is_child(dataset, step_template.dst_dataset)
                            }
                            allowed_empty_children = list(allowed_dst_child_datasets & existing_dst_child_datasets)

                        ensure_has_no_data(step_template.dst_context.shell, step_template.dst_dataset,
                                           allowed_empty_children)

        if not snapshots:
            logger.info("No snapshots to send for replication task %r on dataset %r", step_template.replication_task.id,
                        step_template.src_dataset)
            if not src_snapshots:
                ignored_roots.add(step_template.src_dataset)
            continue

        if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets:
            # Target dataset does not exist, there is a chance that intermediate datasets also do not exist
            parent = os.path.dirname(step_template.dst_dataset)
            if "/" in parent:
                create_dataset(step_template.dst_context.shell, parent)

        encryption = None
        if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets:
            encryption = step_template.replication_task.encryption

        step_template.src_context.context.snapshots_total_by_replication_step_template[step_template] += len(snapshots)
        plan.append((step_template, incremental_base, snapshots, encryption))

    for step_template, incremental_base, snapshots, encryption in plan:
        replicate_snapshots(step_template, incremental_base, snapshots, encryption, observer)
        handle_readonly(step_template)
Пример #9
0
def test__is_child(child, parent, result):
    assert is_child(child, parent) == result
Пример #10
0
def run_replication_steps(step_templates: [ReplicationStepTemplate],
                          observer=None):
    for step_template in step_templates:
        if step_template.replication_task.readonly == ReadOnlyBehavior.REQUIRE:
            if not step_template.dst_context.datasets_readonly.get(
                    step_template.dst_dataset, True):
                message = (
                    f"Target dataset {step_template.dst_dataset!r} exists and does not have readonly=on property, "
                    "but replication task is set up to require this property. Refusing to replicate."
                )
                try:
                    target_type = get_property(step_template.dst_context.shell,
                                               step_template.dst_dataset,
                                               "type")
                except Exception:
                    pass
                else:
                    if target_type == "volume":
                        message += (
                            f" Please run \"zfs set readonly=on {step_template.dst_dataset}\" on the target system "
                            "to fix this.")

                raise ReplicationError(message)

    plan = []
    ignored_roots = set()
    for i, step_template in enumerate(step_templates):
        is_immediate_target_dataset = i == 0

        ignore = False
        for ignored_root in ignored_roots:
            if is_child(step_template.src_dataset, ignored_root):
                logger.debug(
                    "Not replicating dataset %r because it's ancestor %r did not have any snapshots",
                    step_template.src_dataset, ignored_root)
                ignore = True
        if ignore:
            continue

        src_snapshots = step_template.src_context.datasets[
            step_template.src_dataset]
        dst_snapshots = step_template.dst_context.datasets.get(
            step_template.dst_dataset, [])

        incremental_base, snapshots, include_intermediate = get_snapshots_to_send(
            src_snapshots,
            dst_snapshots,
            step_template.replication_task,
            step_template.src_context.shell,
            step_template.src_dataset,
        )
        if incremental_base is None and snapshots:
            if dst_snapshots:
                if step_template.replication_task.allow_from_scratch:
                    logger.warning(
                        "No incremental base for replication task %r on dataset %r, destroying destination dataset",
                        step_template.replication_task.id,
                        step_template.src_dataset,
                    )
                    step_template.dst_context.shell.exec(
                        ["zfs", "destroy", "-r", step_template.dst_dataset])
                    for dictionary in (
                            step_template.dst_context.datasets,
                            step_template.dst_context.datasets_encrypted,
                            step_template.dst_context.datasets_readonly,
                            step_template.dst_context.
                            datasets_receive_resume_tokens,
                    ):
                        if dictionary is None:
                            continue

                        for k in list(dictionary.keys()):
                            if k == step_template.dst_dataset or k.startswith(
                                    f"{step_template.dst_dataset}/"):
                                dictionary.pop(k)
                else:
                    raise NoIncrementalBaseReplicationError(
                        f"No incremental base on dataset {step_template.src_dataset!r} and replication from scratch "
                        f"is not allowed")
            else:
                if not step_template.replication_task.allow_from_scratch:
                    if is_immediate_target_dataset:
                        # We are only interested in checking target datasets, not their children

                        allowed_empty_children = []
                        if step_template.replication_task.recursive:
                            allowed_dst_child_datasets = {
                                get_target_dataset(
                                    step_template.replication_task, dataset)
                                for dataset in
                                (set(step_template.src_context.datasets) -
                                 set(step_template.replication_task.exclude))
                                if dataset != step_template.src_dataset and
                                is_child(dataset, step_template.src_dataset)
                            }
                            existing_dst_child_datasets = {
                                dataset
                                for dataset in
                                step_template.dst_context.datasets
                                if dataset != step_template.dst_dataset and
                                is_child(dataset, step_template.dst_dataset)
                            }
                            allowed_empty_children = list(
                                allowed_dst_child_datasets
                                & existing_dst_child_datasets)

                        ensure_has_no_data(step_template.dst_context.shell,
                                           step_template.dst_dataset,
                                           allowed_empty_children)

        if not snapshots:
            logger.info(
                "No snapshots to send for replication task %r on dataset %r",
                step_template.replication_task.id, step_template.src_dataset)
            if is_immediate_target_dataset and incremental_base is None:
                raise ReplicationError(
                    f"Dataset {step_template.src_dataset!r} does not have any matching snapshots to replicate"
                )
            if not src_snapshots:
                ignored_roots.add(step_template.src_dataset)
            continue

        if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets:
            # Target dataset does not exist, there is a chance that intermediate datasets also do not exist
            parent = os.path.dirname(step_template.dst_dataset)
            if "/" in parent:
                create_dataset(step_template.dst_context.shell, parent)

        encryption = None
        if is_immediate_target_dataset and step_template.dst_dataset not in step_template.dst_context.datasets:
            encryption = step_template.replication_task.encryption

        step_template.src_context.context.snapshots_total_by_replication_step_template[
            step_template] += len(snapshots)
        plan.append((step_template, incremental_base, snapshots,
                     include_intermediate, encryption))

    for step_template, incremental_base, snapshots, include_intermediate, encryption in plan:
        replicate_snapshots(step_template, incremental_base, snapshots,
                            include_intermediate, encryption, observer)
        handle_readonly(step_template)