def test__ssh_read_stdout(bufsize): data = dict(hostname="127.0.0.1", port=22, username="******") set_localhost_transport_options(data) transport = SshTransport.from_data(data) f1 = "0" * bufsize f2 = "1" * bufsize f3 = "2" * bufsize result = transport.shell(transport).exec( ["sh", "-c", f"echo {f1}; sleep 15; echo {f2}; sleep 15; echo {f3}"]) assert result == f"{f1}\n{f2}\n{f3}\n"
def test_replication_data_progress(): subprocess.call("zfs destroy -r data/src", shell=True) subprocess.call("zfs destroy -r data/dst", shell=True) subprocess.check_call("zfs create data/src", shell=True) subprocess.check_call( "dd if=/dev/urandom of=/mnt/data/src/blob bs=1M count=1", shell=True) subprocess.check_call("zfs snapshot data/src@2018-10-01_01-00", shell=True) definition = yaml.safe_load( textwrap.dedent("""\ timezone: "UTC" replication-tasks: src: direction: push transport: type: ssh hostname: 127.0.0.1 source-dataset: - data/src target-dataset: data/dst recursive: true also-include-naming-schema: - "%Y-%m-%d_%H-%M" auto: false retention-policy: none retries: 1 """)) set_localhost_transport_options( definition["replication-tasks"]["src"]["transport"]) definition["replication-tasks"]["src"]["speed-limit"] = 10240 * 9 with patch("zettarepl.replication.run.DatasetSizeObserver.INTERVAL", 5): definition = Definition.from_data(definition) zettarepl = create_zettarepl(definition) zettarepl._spawn_replication_tasks( select_by_class(ReplicationTask, definition.tasks)) wait_replication_tasks_to_complete(zettarepl) calls = [ call for call in zettarepl.observer.call_args_list if call[0][0].__class__ == ReplicationTaskDataProgress ] assert len(calls) == 2 assert 1024 * 1024 * 0.8 <= calls[0][0][0].src_size <= 1024 * 1024 * 1.2 assert 0 <= calls[0][0][0].dst_size <= 10240 * 1.2 assert 1024 * 1024 * 0.8 <= calls[1][0][0].src_size <= 1024 * 1024 * 1.2 assert 10240 * 6 * 0.8 <= calls[1][0][0].dst_size <= 10240 * 6 * 1.2
def test_push_replication(compression): subprocess.call("zfs destroy -r data/src", shell=True) subprocess.call("zfs destroy -r data/dst", shell=True) create_dataset("data/src") subprocess.check_call("zfs snapshot -r data/src@2018-10-01_01-00", shell=True) definition = yaml.safe_load( textwrap.dedent("""\ timezone: "UTC" periodic-snapshot-tasks: src: dataset: data/src recursive: true lifetime: PT1H naming-schema: "%Y-%m-%d_%H-%M" schedule: minute: "0" replication-tasks: src: direction: push transport: type: ssh hostname: 127.0.0.1 source-dataset: data/src target-dataset: data/dst recursive: true periodic-snapshot-tasks: - src auto: true retention-policy: none retries: 1 """)) set_localhost_transport_options( definition["replication-tasks"]["src"]["transport"]) definition["replication-tasks"]["src"]["compression"] = compression run_replication_test(definition)
def test_parallel_replication(): subprocess.call("zfs destroy -r data/src", shell=True) subprocess.call("zfs receive -A data/dst", shell=True) subprocess.call("zfs destroy -r data/dst", shell=True) subprocess.check_call("zfs create data/src", shell=True) subprocess.check_call("zfs create data/src/a", shell=True) subprocess.check_call("dd if=/dev/urandom of=/mnt/data/src/a/blob bs=1M count=1", shell=True) subprocess.check_call("zfs snapshot data/src/a@2018-10-01_01-00", shell=True) subprocess.check_call("zfs create data/src/b", shell=True) subprocess.check_call("dd if=/dev/urandom of=/mnt/data/src/b/blob bs=1M count=1", shell=True) subprocess.check_call("zfs snapshot data/src/b@2018-10-01_01-00", shell=True) subprocess.check_call("zfs create data/dst", shell=True) subprocess.check_call("zfs create data/dst/a", shell=True) subprocess.check_call("zfs create data/dst/b", shell=True) definition = yaml.safe_load(textwrap.dedent("""\ timezone: "UTC" periodic-snapshot-tasks: src-a: dataset: data/src/a recursive: true lifetime: PT1H naming-schema: "%Y-%m-%d_%H-%M" schedule: minute: "0" src-b: dataset: data/src/b recursive: true lifetime: PT1H naming-schema: "%Y-%m-%d_%H-%M" schedule: minute: "0" replication-tasks: src-a: direction: push transport: type: ssh hostname: localhost source-dataset: data/src/a target-dataset: data/dst/a recursive: true periodic-snapshot-tasks: - src-a auto: true retention-policy: none speed-limit: 100000 src-b: direction: push transport: type: ssh hostname: localhost source-dataset: data/src/b target-dataset: data/dst/b recursive: true periodic-snapshot-tasks: - src-b auto: true retention-policy: none speed-limit: 100000 """)) set_localhost_transport_options(definition["replication-tasks"]["src-a"]["transport"]) set_localhost_transport_options(definition["replication-tasks"]["src-b"]["transport"]) definition = Definition.from_data(definition) local_shell = LocalShell() zettarepl = create_zettarepl(definition) zettarepl._spawn_replication_tasks(select_by_class(ReplicationTask, definition.tasks)) start = time.monotonic() wait_replication_tasks_to_complete(zettarepl) end = time.monotonic() assert 10 <= end - start <= 15 zettarepl._spawn_retention.assert_called_once() assert sum(1 for m in zettarepl.observer.call_args_list if isinstance(m[0][0], ReplicationTaskSuccess)) == 2 assert len(list_snapshots(local_shell, "data/dst/a", False)) == 1 assert len(list_snapshots(local_shell, "data/dst/b", False)) == 1 subprocess.call("zfs destroy -r data/dst", shell=True) subprocess.check_call("zfs create data/dst", shell=True) subprocess.check_call("zfs create data/dst/a", shell=True) subprocess.check_call("zfs create data/dst/b", shell=True) zettarepl._replication_tasks_can_run_in_parallel = Mock(return_value=False) zettarepl._spawn_replication_tasks(select_by_class(ReplicationTask, definition.tasks)) start = time.monotonic() wait_replication_tasks_to_complete(zettarepl) end = time.monotonic() assert 20 <= end - start <= 25 assert sum(1 for m in zettarepl.observer.call_args_list if isinstance(m[0][0], ReplicationTaskSuccess)) == 4 assert len(list_snapshots(local_shell, "data/dst/a", False)) == 1 assert len(list_snapshots(local_shell, "data/dst/b", False)) == 1
def test_replication_retry(caplog, direction): subprocess.call("zfs destroy -r data/src", shell=True) subprocess.call("zfs receive -A data/dst", shell=True) subprocess.call("zfs destroy -r data/dst", shell=True) subprocess.check_call("zfs create data/src", shell=True) subprocess.check_call( "dd if=/dev/urandom of=/mnt/data/src/blob bs=1M count=1", shell=True) subprocess.check_call("zfs snapshot data/src@2018-10-01_01-00", shell=True) definition = yaml.safe_load( textwrap.dedent("""\ timezone: "UTC" periodic-snapshot-tasks: src: dataset: data/src recursive: true lifetime: PT1H naming-schema: "%Y-%m-%d_%H-%M" schedule: minute: "0" replication-tasks: src: transport: type: ssh hostname: 127.0.0.1 source-dataset: data/src target-dataset: data/dst recursive: true auto: false retention-policy: none speed-limit: 200000 retries: 2 """)) definition["replication-tasks"]["src"]["direction"] = direction if direction == "push": definition["replication-tasks"]["src"]["periodic-snapshot-tasks"] = [ "src" ] else: definition["replication-tasks"]["src"]["naming-schema"] = [ "%Y-%m-%d_%H-%M" ] set_localhost_transport_options( definition["replication-tasks"]["src"]["transport"]) definition = Definition.from_data(definition) caplog.set_level(logging.INFO) zettarepl = create_zettarepl(definition) zettarepl._spawn_replication_tasks( select_by_class(ReplicationTask, definition.tasks)) time.sleep(2) if direction == "push": subprocess.check_output("kill $(pgrep -f '^zfs recv')", shell=True) else: subprocess.check_output("kill $(pgrep -f '^(zfs send|zfs: sending)')", shell=True) wait_replication_tasks_to_complete(zettarepl) assert any(" recoverable replication error" in record.message for record in caplog.get_records("call")) assert any("Resuming replication for destination dataset" in record.message for record in caplog.get_records("call")) success = zettarepl.observer.call_args_list[-1][0][0] assert isinstance(success, ReplicationTaskSuccess), success local_shell = LocalShell() assert len(list_snapshots(local_shell, "data/dst", False)) == 1
def test_replication_retry(caplog): subprocess.call("zfs destroy -r data/src", shell=True) subprocess.check_call("zfs create data/src", shell=True) subprocess.check_call("zfs snapshot data/src@2018-10-01_01-00", shell=True) definition = yaml.safe_load( textwrap.dedent("""\ timezone: "UTC" periodic-snapshot-tasks: src: dataset: data/src recursive: true lifetime: PT1H naming-schema: "%Y-%m-%d_%H-%M" schedule: minute: "0" replication-tasks: src: transport: type: ssh hostname: localhost direction: push source-dataset: data/src target-dataset: data/dst recursive: true periodic-snapshot-tasks: - src auto: false retention-policy: none retries: 2 """)) set_localhost_transport_options( definition["replication-tasks"]["src"]["transport"]) definition["replication-tasks"]["src"]["transport"][ "private-key"] = textwrap.dedent("""\ -----BEGIN RSA PRIVATE KEY----- MIIEowIBAAKCAQEA0/5hQu83T9Jdl1NT9malC0ovHMHLspa4t6dFTSHWRUHsA3+t q50bBfrsS+hm4qMndxm9Sqig5/TqlM00W49SkooyU/0j4Q4xjvJ61RXOtHXPOoMH opLjRlmbuxkWCb0CmwXvIunaebBFfPx/VuwNJNNv9ZNcgeQJj5ggjI7hnikK4Pn4 jpqcivqIStNO/6q+9NLsNkMQu8vq/zuxC9ePyeaywbbAIcpKREsWgiNtuhsPxnRS +gVQ+XVgE6RFJzMO13MtE+E4Uphseip+fSNVmLeAQyGUrUg12JevJYnMbLOQtacB GNDMHSwcwAzqVYPq8oqjQhWvqBntjcd/qK3P+wIDAQABAoIBAHy8tzoNS7x6CXvb GhJn/0EPW31OQq9IpFPb5pkmCdAio97DJ8tM2/O+238mtjMw0S3xRUJCyrrxj34S 6HXfdTSogEiPMKdiFKMJ5mCvPjtM/qxtIPb1+ykP3ORQNHlyb7AL49PlShpEL/8F C2B38Jv0lXIoTUxYg4+scaqDABpw9aaYTODcJ9uvFhAcAHALKaN0iiz050dWoH9D CkJ1UwoHVUz6XGZ3lOR/qxUDGd72Ara0cizCXQZIkOtu8Kfnfnlx3pqOZJgbkr49 JY3LQId5bVhNlQLKlTSAameIiAJETeLvxHzJHCvMm0LnKDfLiejq/dEk5CMgjrVz ExV+ioECgYEA72zxquQJo051o2mrG0DhVBT0QzXo+8yjNYVha2stBOMGvEnL0n2H VFDdWhpZVzRs1uR6sJC14YTGfBNk7NTaQSorgrKvYs1E/krZEMsFquwIcLtbHxYP zjBSQwYA7jIEFViIkZwptb+qfA+c1YehZTYzx4R/hlkkLlTObyRFcyECgYEA4qtK /7UaBG4kumW+cdRnqJ+KO21PylBnGaCm6yH6DO5SKlqoHvYdyds70Oat9fPX4BRJ 2aMTivZMkGgu6Dc1AViRgBoTIReMQ9TY3y8d0unMtBddAIx0guiP/rtPrCRTC07m s31b6wkLTnPnW3W2N8t4LfdTLpsgmA3t5Q6Iu5sCgYB9Lg+4kpu7Z3U4KDJPAIAP Lxl63n/ezuJyRDdoK1QRXwWRgl/vwLP10IW661XUs1NIk5LWKAMAUyRXkOhOrwch 1QOExRnP5ZTyA340OoHPGLNdBYgh264N1tPbuRLZdwsNggl9YBGqtfhT/vG37r7i pREzesIWIxs4ohyAnY02IQKBgQDARd0Qm2a+a0/sbXHmzO5BM1PmpQsR6rIKIyR0 QBYD8gTwuIXz/YG3QKi0w3i9MWLlSVB7tMFXFyZLOJTRlkL4KVEDARtI7tikkWCF sUnzJy/ldAwH8xzCDtRWmD01IHrxFLTNfIEEFl/o5JhUFL3FBmujUjDVT/GOCgLK UlHaEQKBgFUGEgI6/GvV/JecnEWLqd+HpRHiBywpfOkAFmJGokdAOvF0QDFHK9/P stO7TRqUHufxZQIeTJ7sGdsabEAypiKSFBR8w1qVg+iQZ+M+t0vCgXlnHLaw2SeJ 1YT8kH1TsdzozkxJ7tFa1A5YI37ZiUiN7ykJ0l4Zal6Nli9z5Oa0 -----END RSA PRIVATE KEY----- """) # Some random invalid SSH key definition = Definition.from_data(definition) caplog.set_level(logging.INFO) zettarepl = create_zettarepl(definition) zettarepl._spawn_replication_tasks( select_by_class(ReplicationTask, definition.tasks)) wait_replication_tasks_to_complete(zettarepl) assert any("non-recoverable replication error" in record.message for record in caplog.get_records("call"))
def test_parallel_replication_3(max_parallel_replications): subprocess.call("zfs destroy -r data/src", shell=True) subprocess.call("zfs receive -A data/dst", shell=True) subprocess.call("zfs destroy -r data/dst", shell=True) subprocess.check_call("zfs create data/src", shell=True) subprocess.check_call("zfs create data/src/a", shell=True) subprocess.check_call("dd if=/dev/urandom of=/mnt/data/src/a/blob bs=1M count=1", shell=True) subprocess.check_call("zfs snapshot data/src/a@2018-10-01_01-00", shell=True) subprocess.check_call("zfs create data/src/b", shell=True) subprocess.check_call("dd if=/dev/urandom of=/mnt/data/src/b/blob bs=1M count=1", shell=True) subprocess.check_call("zfs snapshot data/src/b@2018-10-01_01-00", shell=True) subprocess.check_call("zfs create data/src/c", shell=True) subprocess.check_call("dd if=/dev/urandom of=/mnt/data/src/c/blob bs=1M count=1", shell=True) subprocess.check_call("zfs snapshot data/src/c@2018-10-01_01-00", shell=True) subprocess.check_call("zfs create data/dst", shell=True) subprocess.check_call("zfs create data/dst/a", shell=True) subprocess.check_call("zfs create data/dst/b", shell=True) subprocess.check_call("zfs create data/dst/c", shell=True) definition = yaml.safe_load(textwrap.dedent("""\ timezone: "UTC" periodic-snapshot-tasks: src-a: dataset: data/src/a recursive: true lifetime: PT1H naming-schema: "%Y-%m-%d_%H-%M" schedule: minute: "0" src-b: dataset: data/src/b recursive: true lifetime: PT1H naming-schema: "%Y-%m-%d_%H-%M" schedule: minute: "0" src-c: dataset: data/src/c recursive: true lifetime: PT1H naming-schema: "%Y-%m-%d_%H-%M" schedule: minute: "0" replication-tasks: src-a: direction: push transport: type: ssh hostname: localhost source-dataset: data/src/a target-dataset: data/dst/a recursive: true periodic-snapshot-tasks: - src-a auto: true retention-policy: none speed-limit: 100000 src-b: direction: push transport: type: ssh hostname: localhost source-dataset: data/src/b target-dataset: data/dst/b recursive: true periodic-snapshot-tasks: - src-b auto: true retention-policy: none speed-limit: 100000 src-c: direction: push transport: type: ssh hostname: localhost source-dataset: data/src/c target-dataset: data/dst/c recursive: true periodic-snapshot-tasks: - src-c auto: true retention-policy: none speed-limit: 100000 """)) definition["max-parallel-replication-tasks"] = max_parallel_replications set_localhost_transport_options(definition["replication-tasks"]["src-a"]["transport"]) set_localhost_transport_options(definition["replication-tasks"]["src-b"]["transport"]) set_localhost_transport_options(definition["replication-tasks"]["src-c"]["transport"]) definition = Definition.from_data(definition) zettarepl = create_zettarepl(definition) zettarepl._spawn_replication_tasks(select_by_class(ReplicationTask, definition.tasks)) start = time.monotonic() wait_replication_tasks_to_complete(zettarepl) end = time.monotonic() if max_parallel_replications == 3: assert 10 <= end - start <= 15 else: assert 20 <= end - start <= 25