Пример #1
0
def test_snapshot_purge_basic(bin, grpc_controller_client,  # NOQA
                              grpc_replica_client,  # NOQA
                              grpc_replica_client2):  # NOQA
    open_replica(grpc_replica_client)
    open_replica(grpc_replica_client2)

    r1_url = grpc_replica_client.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url,
        r2_url,
    ])
    assert v.replicaCount == 2

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create']
    snap0 = subprocess.check_output(cmd, encoding='utf-8').strip()
    snap1 = subprocess.check_output(cmd, encoding='utf-8').strip()

    chain = grpc_replica_client.replica_get().chain
    assert len(chain) == 3
    assert chain[0] == 'volume-head-002.img'
    assert chain[1] == 'volume-snap-{}.img'.format(snap1)
    assert chain[2] == 'volume-snap-{}.img'.format(snap0)

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'rm', snap0]
    subprocess.check_call(cmd)

    new_chain = grpc_replica_client.replica_get().chain
    assert len(new_chain) == 2
    assert chain[0] == new_chain[0]
    assert chain[1] == new_chain[1]

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'info']
    output = subprocess.check_output(cmd)
    info = json.loads(output)

    assert len(info) == 3
    assert info[snap0]["parent"] == ""
    assert info[snap0]["removed"] is True
    assert info[snap1]["parent"] == snap0
    assert info[snap1]["removed"] is False
    assert info[VOLUME_HEAD]["parent"] == snap1

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'purge']
    subprocess.check_call(cmd)

    wait_for_purge_completion(grpc_controller_client.address)

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'info']
    output = subprocess.check_output(cmd)
    info = json.loads(output)

    assert len(info) == 2
    assert snap0 not in info
    assert info[snap1]["parent"] == ""
Пример #2
0
def test_snapshot_rm_basic(grpc_controller,  # NOQA
                           grpc_replica1, grpc_replica2):  # NOQA
    address = grpc_controller.address

    dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller)

    existings = {}

    snap1 = Snapshot(dev, generate_random_data(existings),
                     address)
    snap2 = Snapshot(dev, generate_random_data(existings),
                     address)
    snap3 = Snapshot(dev, generate_random_data(existings),
                     address)

    info = cmd.snapshot_info(address)
    assert len(info) == 4
    assert VOLUME_HEAD in info
    assert snap1.name in info
    assert snap2.name in info
    assert snap3.name in info

    cmd.snapshot_rm(address, snap2.name)
    cmd.snapshot_purge(address)
    wait_for_purge_completion(address)

    info = cmd.snapshot_info(address)
    assert len(info) == 3
    assert snap1.name in info
    assert snap3.name in info

    snap3.verify_checksum()
    snap2.verify_data()
    snap1.verify_data()

    snapshot_revert_with_frontend(address, ENGINE_NAME, snap1.name)
    snap3.refute_data()
    snap2.refute_data()
    snap1.verify_checksum()
Пример #3
0
def test_volume_expand_with_snapshots(  # NOQA
        bin, grpc_controller_client,  # NOQA
        grpc_replica_client, grpc_replica_client2):  # NOQA
    open_replica(grpc_replica_client)
    open_replica(grpc_replica_client2)

    r1_url = grpc_replica_client.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url,
        r2_url,
    ])
    assert v.replicaCount == 2

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create']
    snap0 = subprocess.check_output(cmd, encoding='utf-8').strip()
    expected = grpc_replica_client.replica_get().chain[1]
    assert expected == 'volume-snap-{}.img'.format(snap0)

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create',
           '--label', 'name=snap1', '--label', 'key=value']
    snap1 = subprocess.check_output(cmd, encoding='utf-8').strip()

    expand_volume_with_frontend(grpc_controller_client, EXPANDED_SIZE)
    wait_and_check_volume_expansion(
        grpc_controller_client, EXPANDED_SIZE)

    # `expand` will create a snapshot then apply the new size
    # on the new head file
    snap_expansion = get_expansion_snapshot_name()
    r1 = grpc_replica_client.replica_get()
    assert r1.chain[1] == 'volume-snap-{}.img'.format(snap_expansion)
    assert r1.size == EXPANDED_SIZE_STR
    r2 = grpc_replica_client2.replica_get()
    assert r2.chain[1] == 'volume-snap-{}.img'.format(snap_expansion)
    assert r2.size == EXPANDED_SIZE_STR

    replica_paths = get_replica_paths_from_snapshot_name(snap_expansion)
    assert replica_paths
    for p in replica_paths:
        snap_path = get_snapshot_file_paths(
            p, snap_expansion)
        assert snap_path is not None
        assert os.path.exists(snap_path)
        assert os.path.getsize(snap_path) == SIZE
        head_path = get_replica_head_file_path(p)
        assert head_path is not None
        assert os.path.exists(head_path)
        assert os.path.getsize(head_path) == EXPANDED_SIZE

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create',
           '--label', 'name=snap2']
    snap2 = subprocess.check_output(cmd, encoding='utf-8').strip()

    cmd = [bin, '--debug',
           '--url', grpc_controller_client.address,
           'snapshot', 'ls']
    ls_output = subprocess.check_output(cmd, encoding='utf-8')

    assert ls_output == '''ID
{}
{}
{}
{}
'''.format(snap2,
           snap_expansion,
           snap1,
           snap0)

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'info']
    output = subprocess.check_output(cmd)
    info = json.loads(output)

    # cannot check the snapshot size here since the output will return
    # the actual file size
    assert info[snap_expansion]["parent"] == snap1
    assert info[snap_expansion]["removed"] is False
    assert info[snap_expansion]["usercreated"] is False
    assert len(info[snap_expansion]["labels"]) == 1
    assert \
        info[snap_expansion]["labels"]["replica-expansion"] \
        == EXPANDED_SIZE_STR
    assert info[VOLUME_HEAD]["parent"] == snap2
    assert len(info[VOLUME_HEAD]["labels"]) == 0

    # snapshot purge command will coalesce the expansion snapshot
    # with its child snapshot `snap2`
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'purge']
    subprocess.check_call(cmd)
    wait_for_purge_completion(grpc_controller_client.address)

    cmd = [bin, '--debug',
           '--url', grpc_controller_client.address,
           'snapshot', 'ls']
    ls_output = subprocess.check_output(cmd, encoding='utf-8')

    assert ls_output == '''ID
{}
{}
{}
'''.format(snap2,
           snap1,
           snap0)

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'info']
    output = subprocess.check_output(cmd)
    info = json.loads(output)
    assert snap_expansion not in info
    assert info[snap2]["parent"] == snap1
    assert info[snap2]["removed"] is False
    assert info[snap2]["usercreated"] is True

    for p in replica_paths:
        snap1_path = get_snapshot_file_paths(
            p, snap1)
        assert snap1_path is not None
        assert os.path.exists(snap1_path)
        assert os.path.getsize(snap1_path) == SIZE
        snap2_path = get_snapshot_file_paths(
            p, snap2)
        assert snap2_path is not None
        assert os.path.exists(snap2_path)
        assert os.path.getsize(snap2_path) == EXPANDED_SIZE

    # Make sure the smaller snapshot `snap1` can be folded to
    # the larger one `snap2` and the replica size won't change.
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'rm', snap1]
    subprocess.check_call(cmd)
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'purge']
    subprocess.check_call(cmd)
    wait_for_purge_completion(grpc_controller_client.address)

    cmd = [bin, '--debug',
           '--url', grpc_controller_client.address,
           'snapshot', 'ls']
    ls_output = subprocess.check_output(cmd, encoding='utf-8')

    assert ls_output == '''ID
{}
{}
'''.format(snap2, snap0)

    for p in replica_paths:
        snap0_path = get_snapshot_file_paths(
            p, snap0)
        assert snap0_path is not None
        assert os.path.exists(snap0_path)
        assert os.path.getsize(snap0_path) == SIZE
        snap2_path = get_snapshot_file_paths(
            p, snap2)
        assert snap2_path is not None
        assert os.path.exists(snap2_path)
        assert os.path.getsize(snap2_path) == EXPANDED_SIZE
        head_path = get_replica_head_file_path(p)
        assert head_path is not None
        assert os.path.exists(head_path)
        assert os.path.getsize(head_path) == EXPANDED_SIZE

    r1 = grpc_replica_client.replica_get()
    assert r1.chain[1] == 'volume-snap-{}.img'.format(snap2)
    assert r1.size == EXPANDED_SIZE_STR
    r2 = grpc_replica_client2.replica_get()
    assert r2.chain[1] == 'volume-snap-{}.img'.format(snap2)
    assert r2.size == EXPANDED_SIZE_STR
Пример #4
0
def test_engine_restart_after_sigkill(bin):  # NOQA
    """
    Test if engine can be restarted after crashing by SIGKILL.

    1. Create then initialize 1 engine and 2 replicas.
    2. Start the engine.
    3. Create 2 snapshots.
    4. Use SIGKILL to kill the engine process.
    5. Wait for the engine errored.
    6. Mock volume detachment by deleting
       the engine process and replicas processes.
    7. Mock volume reattachment by recreating processes and
       re-starting the engine.
    8. Check if the engine is up with 2 replicas.
    9. Check if the engine still works fine
       by creating/removing/purging snapshots.
    """
    em_client = ProcessManagerClient(INSTANCE_MANAGER_ENGINE)
    engine_process = create_engine_process(em_client)
    grpc_controller_client = ControllerClient(
        get_process_address(engine_process))

    rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA)
    replica_dir1 = tempfile.mkdtemp()
    replica_dir2 = tempfile.mkdtemp()
    replica_process1 = create_replica_process(rm_client, REPLICA_NAME,
                                              replica_dir=replica_dir1)
    grpc_replica_client1 = ReplicaClient(
        get_process_address(replica_process1))
    time.sleep(3)
    cleanup_replica(grpc_replica_client1)
    replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME,
                                              replica_dir=replica_dir2)
    grpc_replica_client2 = ReplicaClient(
        get_process_address(replica_process2))
    time.sleep(3)
    cleanup_replica(grpc_replica_client2)

    open_replica(grpc_replica_client1)
    open_replica(grpc_replica_client2)
    r1_url = grpc_replica_client1.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url, r2_url,
    ])
    assert v.replicaCount == 2

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create']
    snap0 = subprocess.check_output(cmd, encoding='utf-8').strip()
    expected = grpc_replica_client1.replica_get().chain[1]
    assert expected == 'volume-snap-{}.img'.format(snap0)

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create',
           '--label', 'name=snap1', '--label', 'key=value']
    snap1 = subprocess.check_output(cmd, encoding='utf-8').strip()

    cmd = ["bash", "-c",
           "kill -9 $(ps aux | grep %s | grep -v grep | awk '{print $2}')" %
           VOLUME_NAME]
    subprocess.check_call(cmd)
    wait_for_process_error(em_client, ENGINE_NAME)

    # Mock detach:
    cleanup_process(em_client)
    cleanup_process(rm_client)

    # Mock reattach:
    #   1. Directly create replicas processes.
    #   2. Call replica_create() to init replica servers for replica processes.
    #   3. Create one engine process and start the engine with replicas.
    replica_process1 = create_replica_process(rm_client, REPLICA_NAME,
                                              replica_dir=replica_dir1)
    grpc_replica_client1 = ReplicaClient(
        get_process_address(replica_process1))
    time.sleep(3)
    grpc_replica_client1.replica_create(size=SIZE_STR)
    replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME,
                                              replica_dir=replica_dir2)
    grpc_replica_client2 = ReplicaClient(
        get_process_address(replica_process2))
    time.sleep(3)
    grpc_replica_client2.replica_create(size=SIZE_STR)

    engine_process = create_engine_process(em_client)
    grpc_controller_client = ControllerClient(
        get_process_address(engine_process))
    r1_url = grpc_replica_client1.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url, r2_url,
    ])
    assert v.replicaCount == 2

    # Verify the engine still works fine
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create']
    snap2 = subprocess.check_output(cmd, encoding='utf-8').strip()
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'rm', snap1]
    subprocess.check_call(cmd)
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'purge']
    subprocess.check_call(cmd)
    wait_for_purge_completion(grpc_controller_client.address)
    cmd = [bin, '--debug',
           '--url', grpc_controller_client.address,
           'snapshot', 'ls']
    ls_output = subprocess.check_output(cmd, encoding='utf-8')

    assert ls_output == '''ID
{}
{}
'''.format(snap2, snap0)

    cleanup_process(em_client)
    cleanup_process(rm_client)
Пример #5
0
def test_single_replica_failure_during_engine_start(bin):  # NOQA
    """
    Test if engine still works fine if there is an invalid
    replica/backend in the starting phase

    1. Create then initialize 1 engine and 2 replicas.
    2. Start the engine.
    3. Create 2 snapshots.
    4. Mess up the replica1 by manually modifying the snapshot meta file.
    5. Mock volume detachment by deleting
       the engine process and replicas processes.
    6. Mock volume reattachment by recreating processes and
       re-starting the engine.
    7. Check if the engine is up and if replica1 is mode ERR
       in the engine.
    8. Check if the engine still works fine
       by creating one more snapshot.
    9. Remove the ERR replica from the engine
       then check snapshot remove and snapshot purge work fine.
    10. Check if the snapshot list is correct.
    """
    em_client = ProcessManagerClient(INSTANCE_MANAGER_ENGINE)
    engine_process = create_engine_process(em_client)
    grpc_controller_client = ControllerClient(
        get_process_address(engine_process))

    rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA)
    replica_dir1 = tempfile.mkdtemp()
    replica_dir2 = tempfile.mkdtemp()
    replica_process1 = create_replica_process(rm_client, REPLICA_NAME,
                                              replica_dir=replica_dir1)
    grpc_replica_client1 = ReplicaClient(
        get_process_address(replica_process1))
    time.sleep(3)
    cleanup_replica(grpc_replica_client1)
    replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME,
                                              replica_dir=replica_dir2)
    grpc_replica_client2 = ReplicaClient(
        get_process_address(replica_process2))
    time.sleep(3)
    cleanup_replica(grpc_replica_client2)

    open_replica(grpc_replica_client1)
    open_replica(grpc_replica_client2)
    r1_url = grpc_replica_client1.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url, r2_url,
    ])
    assert v.replicaCount == 2

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create']
    snap0 = subprocess.check_output(cmd, encoding='utf-8').strip()
    expected = grpc_replica_client1.replica_get().chain[1]
    assert expected == 'volume-snap-{}.img'.format(snap0)

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create',
           '--label', 'name=snap1', '--label', 'key=value']
    snap1 = subprocess.check_output(cmd, encoding='utf-8').strip()

    # Mess up the replica1 by manually modifying the snapshot meta file
    r1_snap1_meta_path = os.path.join(replica_dir1,
                                      'volume-snap-{}.img.meta'.format(snap1))
    with open(r1_snap1_meta_path, 'r') as f:
        snap1_meta_info = json.load(f)
    with open(r1_snap1_meta_path, 'w') as f:
        snap1_meta_info["Parent"] = "invalid-parent.img"
        json.dump(snap1_meta_info, f)

    # Mock detach:
    cleanup_process(em_client)
    cleanup_process(rm_client)

    # Mock reattach:
    #   1. Directly create replicas processes.
    #   2. Call replica_create() to init replica servers for replica processes.
    #   3. Create one engine process and start the engine with replicas.
    replica_process1 = create_replica_process(rm_client, REPLICA_NAME,
                                              replica_dir=replica_dir1)
    grpc_replica_client1 = ReplicaClient(
        get_process_address(replica_process1))
    time.sleep(3)
    grpc_replica_client1.replica_create(size=SIZE_STR)
    replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME,
                                              replica_dir=replica_dir2)
    grpc_replica_client2 = ReplicaClient(
        get_process_address(replica_process2))
    time.sleep(3)
    grpc_replica_client2.replica_create(size=SIZE_STR)

    engine_process = create_engine_process(em_client)
    grpc_controller_client = ControllerClient(
        get_process_address(engine_process))
    r1_url = grpc_replica_client1.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url, r2_url,
    ])
    assert v.replicaCount == 2

    # Check if replica1 is mode `ERR`
    rs = grpc_controller_client.replica_list()
    assert len(rs) == 2
    r1_verified = False
    r2_verified = False
    for r in rs:
        if r.address == r1_url:
            assert r.mode == 'ERR'
            r1_verified = True
        if r.address == r2_url:
            assert r.mode == 'RW'
            r2_verified = True
    assert r1_verified
    assert r2_verified

    # The engine still works fine
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create']
    snap2 = subprocess.check_output(cmd, encoding='utf-8').strip()

    # Remove the ERR replica before removing snapshots
    grpc_controller_client.replica_delete(r1_url)
    rs = grpc_controller_client.replica_list()
    assert len(rs) == 1
    assert rs[0].address == r2_url
    assert rs[0].mode == "RW"

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'rm', snap1]
    subprocess.check_call(cmd)
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'purge']
    subprocess.check_call(cmd)
    wait_for_purge_completion(grpc_controller_client.address)

    cmd = [bin, '--debug',
           '--url', grpc_controller_client.address,
           'snapshot', 'ls']
    ls_output = subprocess.check_output(cmd, encoding='utf-8')

    assert ls_output == '''ID
{}
{}
'''.format(snap2, snap0)

    cleanup_process(em_client)
    cleanup_process(rm_client)
Пример #6
0
def test_ha_single_replica_rebuild(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    address = grpc_controller.address

    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r1_url = grpc_replica1.url
    r2_url = grpc_replica2.url
    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev(VOLUME_NAME)

    data = random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    # Cleanup replica2
    cleanup_replica(grpc_replica2)

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, r2_url, "ERR")

    verify_read(dev, data_offset, data)

    grpc_controller.replica_delete(replicas[1].address)

    # Rebuild replica2
    open_replica(grpc_replica2)
    cmd.add_replica(address, r2_url)
    wait_for_rebuild_complete(address)

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, r2_url, "RW")

    verify_read(dev, data_offset, data)

    # WORKAROUND for unable to remove the parent of volume head
    newsnap = cmd.snapshot_create(address)

    info = cmd.snapshot_info(address)
    assert len(info) == 3
    sysnap = info[newsnap]["parent"]
    assert info[sysnap]["parent"] == ""
    assert newsnap in info[sysnap]["children"]
    assert info[sysnap]["usercreated"] is False
    assert info[sysnap]["removed"] is False

    cmd.snapshot_purge(address)
    wait_for_purge_completion(address)

    info = cmd.snapshot_info(address)
    assert len(info) == 2
    assert info[newsnap] is not None
    assert info[VOLUME_HEAD] is not None
Пример #7
0
def test_inc_restore_with_rebuild_and_expansion(grpc_controller, grpc_replica1,
                                                grpc_replica2,
                                                grpc_controller_no_frontend,
                                                grpc_fixed_dir_replica1,
                                                grpc_fixed_dir_replica2,
                                                backup_targets):  # NOQA

    # Pick up a random backup target.
    backup_target = backup_targets[random.randint(0, 1)]

    address = grpc_controller.address
    dr_address = grpc_controller_no_frontend.address

    try:
        cmd.backup_volume_rm(address, VOLUME_NAME, backup_target)
    except Exception:
        pass

    dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller)

    start_no_frontend_volume(grpc_controller_no_frontend,
                             grpc_fixed_dir_replica1)

    data0_len = random_length(PAGE_SIZE)
    data0 = Data(random.randrange(0, SIZE - 2 * PAGE_SIZE, PAGE_SIZE),
                 data0_len, random_string(data0_len))
    snap0 = Snapshot(dev, data0, address)

    backup0_info = create_backup(address, snap0.name, backup_target)
    assert backup0_info["VolumeName"] == VOLUME_NAME
    assert backup0_info["Size"] == str(BLOCK_SIZE)

    cmd.backup_restore(dr_address, backup0_info["URL"])
    wait_for_restore_completion(dr_address, backup0_info["URL"])
    verify_no_frontend_data(data0.offset, data0.content,
                            grpc_controller_no_frontend)

    expand_volume_with_frontend(grpc_controller, EXPANDED_SIZE)
    wait_and_check_volume_expansion(grpc_controller, EXPANDED_SIZE)

    data1_len = random_length(PAGE_SIZE)
    data1 = Data(random.randrange(SIZE, EXPANDED_SIZE - PAGE_SIZE, PAGE_SIZE),
                 data1_len, random_string(data1_len))
    snap1 = Snapshot(dev, data1, address)

    backup1_info = create_backup(address, snap1.name, backup_target,
                                 EXPANDED_SIZE_STR)
    assert backup1_info["VolumeName"] == VOLUME_NAME
    assert backup1_info["Size"] == str(2 * BLOCK_SIZE)

    backup_volumes = cmd.backup_volume_list(address, VOLUME_NAME,
                                            backup_target)
    assert VOLUME_NAME in backup_volumes
    url = get_backup_volume_url(backup_target, VOLUME_NAME)
    backup_info = cmd.backup_inspect_volume(address, url)
    assert backup_info["Size"] == EXPANDED_SIZE_STR

    # restore command invocation should error out
    with pytest.raises(subprocess.CalledProcessError) as e:
        cmd.backup_restore(dr_address, backup1_info["URL"])
    assert "need to expand the DR volume" in e.value.stdout

    # The above restore error is triggered before calling the replicas.
    # Hence the error won't be recorded in the restore status
    # and we can continue restoring backups for the DR volume.
    rs = cmd.restore_status(dr_address)
    for status in rs.values():
        assert status['backupURL'] == backup0_info["URL"]
        assert status['lastRestored'] == backup0_info["Name"]
        assert 'error' not in status.keys()
        assert not status["isRestoring"]

    grpc_controller_no_frontend.volume_expand(EXPANDED_SIZE)
    wait_for_volume_expansion(grpc_controller_no_frontend, EXPANDED_SIZE)

    # This restore command will trigger snapshot purge.
    # And the error is triggered before calling the replicas.
    with pytest.raises(subprocess.CalledProcessError) as e:
        cmd.backup_restore(dr_address, backup1_info["URL"])
    assert "found more than 1 snapshot in the replicas, " \
           "hence started to purge snapshots before the restore" \
           in e.value.stdout
    wait_for_purge_completion(dr_address)

    snaps_info = cmd.snapshot_info(dr_address)
    assert len(snaps_info) == 2
    volume_head_name = "volume-head"
    snap_name = "expand-" + EXPANDED_SIZE_STR
    head_info = snaps_info[volume_head_name]
    assert head_info["name"] == volume_head_name
    assert head_info["parent"] == snap_name
    assert not head_info["children"]
    assert head_info["usercreated"] is False
    snap_info = snaps_info[snap_name]
    assert snap_info["name"] == snap_name
    assert not snap_info["parent"]
    assert volume_head_name in snap_info["children"]
    assert snap_info["usercreated"] is False

    cmd.backup_restore(dr_address, backup1_info["URL"])
    wait_for_restore_completion(dr_address, backup1_info["URL"])
    verify_no_frontend_data(data1.offset, data1.content,
                            grpc_controller_no_frontend)

    # For DR volume, the rebuilding replica won't be expanded automatically.
    open_replica(grpc_fixed_dir_replica2)
    with pytest.raises(subprocess.CalledProcessError):
        cmd.add_replica(dr_address, grpc_fixed_dir_replica2.url, True)

    # Manually expand the rebuilding replica then retry `add-replica`.
    grpc_fixed_dir_replica2.replica_open()
    grpc_fixed_dir_replica2.replica_expand(EXPANDED_SIZE)
    grpc_fixed_dir_replica2.replica_close()
    cmd.add_replica(dr_address, grpc_fixed_dir_replica2.url, True)

    replicas = grpc_controller_no_frontend.replica_list()
    assert len(replicas) == 2
    rw_replica, wo_replica = 0, 0
    for r in replicas:
        if r.mode == 'RW':
            rw_replica += 1
        else:
            assert r.mode == "WO"
            wo_replica += 1
    assert rw_replica == 1 and wo_replica == 1

    # The old replica will fail the restore but the error won't be recorded.
    # Then rebuilding replica will start full restore.
    with pytest.raises(subprocess.CalledProcessError) as e:
        cmd.backup_restore(dr_address, backup1_info["URL"])
    assert "already restored backup" in e.value.stdout
    wait_for_restore_completion(dr_address, backup1_info["URL"])

    cmd.verify_rebuild_replica(dr_address, grpc_fixed_dir_replica2.url)
    replicas = grpc_controller_no_frontend.replica_list()
    assert len(replicas) == 2
    for r in replicas:
        assert r.mode == 'RW'

    verify_no_frontend_data(data1.offset, data1.content,
                            grpc_controller_no_frontend)

    cmd.backup_volume_rm(grpc_controller.address, VOLUME_NAME, backup_target)
Пример #8
0
def volume_expansion_with_snapshots_test(dev, grpc_controller,  # NOQA
                                         volume_name, engine_name,
                                         original_data):
    # the default size is 4MB, will expand it to 8MB
    address = grpc_controller.address
    zero_char = b'\x00'.decode('utf-8')

    # write the data to the original part then do expansion
    data1_len = random_length(PAGE_SIZE)
    data1 = Data(random.randrange(0, SIZE-2*PAGE_SIZE, PAGE_SIZE),
                 data1_len, random_string(data1_len))
    snap1 = Snapshot(dev, data1, address)

    expand_volume_with_frontend(grpc_controller, EXPANDED_SIZE)
    wait_and_check_volume_expansion(
        grpc_controller, EXPANDED_SIZE)

    snap1.verify_data()
    assert \
        dev.readat(0, SIZE) == \
        original_data[0:data1.offset] + data1.content + \
        original_data[data1.offset+data1.length:]
    assert dev.readat(SIZE, SIZE) == zero_char*SIZE

    # write the data to both the original part and the expanded part
    data2_len = random_length(PAGE_SIZE)
    data2 = Data(SIZE-PAGE_SIZE,
                 data2_len, random_string(data2_len))
    snap2 = Snapshot(dev, data2, address)
    data3_len = random_length(PAGE_SIZE)
    data3 = Data(random.randrange(SIZE, EXPANDED_SIZE-PAGE_SIZE, PAGE_SIZE),
                 data3_len, random_string(data3_len))
    snap3 = Snapshot(dev, data3, address)
    snap1.verify_data()
    snap2.verify_data()
    snap3.verify_data()
    assert \
        dev.readat(SIZE, SIZE) == zero_char*(data3.offset-SIZE) + \
        data3.content + zero_char*(EXPANDED_SIZE-data3.offset-data3.length)

    data4_len = random_length(PAGE_SIZE)
    data4 = Data(data1.offset,
                 data4_len, random_string(data4_len))
    snap4 = Snapshot(dev, data4, address)
    snap4.verify_data()

    # revert to snap1 then see if we can still r/w the existing data
    # and expanded part
    snapshot_revert_with_frontend(address, engine_name, snap1.name)
    assert \
        dev.readat(0, SIZE) == \
        original_data[0:data1.offset] + data1.content + \
        original_data[data1.offset+data1.length:]
    assert dev.readat(SIZE, SIZE) == zero_char*SIZE

    data5_len = random_length(PAGE_SIZE)
    data5 = Data(random.randrange(SIZE, EXPANDED_SIZE-PAGE_SIZE, PAGE_SIZE),
                 data5_len, random_string(data5_len))
    snap5 = Snapshot(dev, data5, address)
    snap5.verify_data()
    assert \
        dev.readat(SIZE, SIZE) == zero_char*(data5.offset-SIZE) + \
        data5.content + zero_char*(EXPANDED_SIZE-data5.offset-data5.length)

    # delete and purge the snap1. it will coalesce with the larger snap2
    cmd.snapshot_rm(address, snap1.name)
    cmd.snapshot_purge(address)
    wait_for_purge_completion(address)
    assert \
        dev.readat(0, SIZE) == \
        original_data[0:data1.offset] + data1.content + \
        original_data[data1.offset+data1.length:]
    assert \
        dev.readat(SIZE, SIZE) == zero_char*(data5.offset-SIZE) + \
        data5.content + zero_char*(EXPANDED_SIZE-data5.offset-data5.length)
Пример #9
0
def test_snapshot_tree_basic(grpc_controller,  # NOQA
                             grpc_replica1, grpc_replica2):  # NOQA
    address = grpc_controller.address

    dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller)

    offset = 0
    length = 128

    snap, data = snapshot_tree_build(dev, address, ENGINE_NAME,
                                     offset, length)

    snapshot_revert_with_frontend(address, ENGINE_NAME, snap["1b"])
    cmd.snapshot_rm(address, snap["0a"])
    cmd.snapshot_rm(address, snap["0b"])
    cmd.snapshot_rm(address, snap["1c"])
    cmd.snapshot_rm(address, snap["2a"])
    cmd.snapshot_rm(address, snap["2b"])
    cmd.snapshot_rm(address, snap["2c"])
    cmd.snapshot_rm(address, snap["3a"])
    cmd.snapshot_rm(address, snap["3b"])
    cmd.snapshot_rm(address, snap["3c"])
    cmd.snapshot_purge(address)
    wait_for_purge_completion(address)

    # the result should looks like this
    # snap["0b"](r) -> snap["0c"]
    #   \-> snap["1a"] -> snap["1b"] -> head
    info = cmd.snapshot_info(address)
    assert len(info) == 5

    assert snap["0b"] in info
    assert info[snap["0b"]]["parent"] == ""
    assert len(info[snap["0b"]]["children"]) == 2
    assert snap["0c"] in info[snap["0b"]]["children"]
    assert snap["1a"] in info[snap["0b"]]["children"]
    assert info[snap["0b"]]["removed"] is True

    assert snap["0c"] in info
    assert info[snap["0c"]]["parent"] == snap["0b"]
    assert not info[snap["0c"]]["children"]

    assert snap["1a"] in info
    assert info[snap["1a"]]["parent"] == snap["0b"]
    assert snap["1b"] in info[snap["1a"]]["children"]

    assert snap["1b"] in info
    assert info[snap["1b"]]["parent"] == snap["1a"]
    assert VOLUME_HEAD in info[snap["1b"]]["children"]

    assert VOLUME_HEAD in info
    assert info[VOLUME_HEAD]["parent"] == snap["1b"]

    snapshot_tree_verify_node(dev, address, ENGINE_NAME,
                              offset, length, snap, data, "0b")
    snapshot_tree_verify_node(dev, address, ENGINE_NAME,
                              offset, length, snap, data, "0c")
    snapshot_tree_verify_node(dev, address, ENGINE_NAME,
                              offset, length, snap, data, "1a")
    snapshot_tree_verify_node(dev, address, ENGINE_NAME,
                              offset, length, snap, data, "1b")
Пример #10
0
def test_snapshot_rm_rolling(grpc_controller,  # NOQA
                             grpc_replica1, grpc_replica2):  # NOQA
    address = grpc_controller.address

    dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller)

    existings = {}

    snap1 = Snapshot(dev, generate_random_data(existings),
                     address)

    snapList = cmd.snapshot_ls(address)
    assert snap1.name in snapList

    cmd.snapshot_rm(address, snap1.name)
    # cannot do anything because it's the parent of volume head
    cmd.snapshot_purge(address)
    wait_for_purge_completion(address)

    snap2 = Snapshot(dev, generate_random_data(existings),
                     address)

    info = cmd.snapshot_info(address)
    assert len(info) == 3
    assert snap1.name in info
    assert snap2.name in info
    assert info[snap1.name]["removed"] is True
    assert info[snap2.name]["removed"] is False

    cmd.snapshot_rm(address, snap2.name)
    # this should trigger the deletion of snap1
    cmd.snapshot_purge(address)
    wait_for_purge_completion(address)

    snap2.verify_checksum()
    snap1.verify_data()

    snap3 = Snapshot(dev, generate_random_data(existings),
                     address)
    snap4 = Snapshot(dev, generate_random_data(existings),
                     address)
    snap5 = Snapshot(dev, generate_random_data(existings),
                     address)

    snapList = cmd.snapshot_ls(address)
    assert snap1.name not in snapList
    assert snap2.name not in snapList
    assert snap3.name in snapList
    assert snap4.name in snapList
    assert snap5.name in snapList

    info = cmd.snapshot_info(address)
    assert len(info) == 5
    assert snap1.name not in info
    assert snap2.name in info
    assert snap3.name in info
    assert snap4.name in info
    assert snap5.name in info
    assert info[snap2.name]["removed"] is True

    cmd.snapshot_rm(address, snap3.name)
    cmd.snapshot_rm(address, snap4.name)
    cmd.snapshot_rm(address, snap5.name)
    # this should trigger the deletion of snap2 - snap4
    # and snap5 marked as removed
    cmd.snapshot_purge(address)
    wait_for_purge_completion(address)

    info = cmd.snapshot_info(address)
    assert len(info) == 2
    assert snap1.name not in info
    assert snap2.name not in info
    assert snap3.name not in info
    assert snap4.name not in info
    assert snap5.name in info
    assert info[snap5.name]["removed"] is True

    snap5.verify_checksum()
    snap4.verify_data()
    snap3.verify_data()
    snap2.verify_data()
    snap1.verify_data()