Exemplo n.º 1
0
def test_frontend_switch(
        grpc_controller_no_frontend,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA

    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller_no_frontend.replica_list()
    assert len(replicas) == 0

    r1_url = grpc_replica1.url
    r2_url = grpc_replica2.url
    v = grpc_controller_no_frontend.volume_start(replicas=[r1_url, r2_url])
    assert v.name == VOLUME_NO_FRONTEND_NAME
    assert v.replicaCount == 2
    assert v.frontend == ""

    grpc_controller_no_frontend.volume_frontend_start(FRONTEND_TGT_BLOCKDEV)

    dev = get_blockdev(volume=VOLUME_NO_FRONTEND_NAME)

    data = random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    grpc_controller_no_frontend.volume_frontend_shutdown()

    grpc_controller_no_frontend.volume_frontend_start(FRONTEND_TGT_BLOCKDEV)

    dev = get_blockdev(volume=VOLUME_NO_FRONTEND_NAME)
    verify_read(dev, data_offset, data)

    grpc_controller_no_frontend.volume_frontend_shutdown()
Exemplo n.º 2
0
def test_ha_revision_counter_consistency(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r1_url = grpc_replica1.url
    r2_url = grpc_replica2.url
    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])
    assert v.name == VOLUME_NAME
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev(VOLUME_NAME)

    verify_async(dev, 10, 128, 100)

    r1 = grpc_replica1.replica_get()
    r2 = grpc_replica2.replica_get()
    # kernel can merge requests so backend may not receive 1000 writes
    assert r1.revisionCounter > 0
    assert r1.revisionCounter == r2.revisionCounter
Exemplo n.º 3
0
def test_ha_single_replica_failure(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r1_url = grpc_replica1.url
    r2_url = grpc_replica2.url
    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev(VOLUME_NAME)

    data = random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    cleanup_replica(grpc_replica2)

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, r2_url, "ERR")

    verify_read(dev, data_offset, data)
Exemplo n.º 4
0
def test_ha_remove_extra_disks(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    address = grpc_controller.address

    prepare_backup_dir(BACKUP_DIR)
    open_replica(grpc_replica1)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r1_url = grpc_replica1.url
    v = grpc_controller.volume_start(replicas=[r1_url])
    assert v.name == VOLUME_NAME
    assert v.replicaCount == 1

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 1
    assert replicas[0].mode == "RW"

    dev = get_blockdev(VOLUME_NAME)

    wasted_data = random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, wasted_data)

    # now replica1 contains extra data in a snapshot
    cmd.snapshot_create(address)

    cleanup_controller(grpc_controller)

    open_replica(grpc_replica2)
    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r2_url = grpc_replica2.url
    v = grpc_controller.volume_start(replicas=[r2_url])
    assert v.name == VOLUME_NAME
    assert v.replicaCount == 1

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 1
    assert replicas[0].mode == "RW"

    dev = get_blockdev(VOLUME_NAME)

    data = random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    r1 = grpc_replica1.replica_reload()
    print(r1)

    cmd.add_replica(address, r1_url)
    wait_for_rebuild_complete(address)

    verify_data(dev, data_offset, data)
Exemplo n.º 5
0
def start_no_frontend_volume(grpc_em, grpc_c, grpc_r1, grpc_r2):
    grpc_c.volume_frontend_start(FRONTEND_TGT_BLOCKDEV)

    open_replica(grpc_r1)
    open_replica(grpc_r2)

    dr_replicas = grpc_c.replica_list()
    assert len(dr_replicas) == 0

    r1_url = grpc_r1.url
    r2_url = grpc_r2.url
    v = grpc_c.volume_start(replicas=[r1_url, r2_url])
    assert v.replicaCount == 2

    grpc_c.volume_frontend_shutdown()
    v = grpc_c.volume_get()
    assert v.frontendState == "down"
Exemplo n.º 6
0
def test_replica_crashed_update_state_error(grpc_controller,
                                            grpc_fixed_dir_replica1):  # NOQA
    """
    The test flow:
    1. Create a fixed directory replica1, since we need to remove a file
    manually.
    2. Remove file 'volume-head-000.img' manually from fixed directory
    replica1.
    3. Check this fixed diretory replica1 should be in 'ERR' state.
    4. Clean up created replica.
    """
    # Create a fixed directory replica1
    open_replica(grpc_fixed_dir_replica1)

    # Before created a volume, the engine controller should have no replica.
    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    # Create a volume on this engine controller with fixed_dir_replica1.
    r1_url = grpc_fixed_dir_replica1.url
    v = grpc_controller.volume_start(replicas=[r1_url])
    assert v.replicaCount == 1

    # Check engine controller should have 1 replica in 'RW' mode.
    replicas = grpc_controller.replica_list()
    assert len(replicas) == 1
    assert replicas[0].mode == "RW"

    # Get the replica object
    r = grpc_fixed_dir_replica1.replica_get()
    assert r.chain == ['volume-head-000.img']
    assert r.state == 'open'
    assert r.sector_size == 512

    # Removing a file from this replica directory
    remove_file = os.path.join(FIXED_REPLICA_PATH1, "volume-head-000.img")

    assert os.path.exists(remove_file)
    os.remove(remove_file)

    # After removing the file, the replica should be in 'ERR' mode.
    verify_replica_mode(grpc_controller, r1_url, "ERR")

    # Cleanup created replica.
    cleanup_replica(grpc_fixed_dir_replica1)
Exemplo n.º 7
0
def test_snapshot_tree_rebuild(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    address = grpc_controller.address

    offset = 0
    length = 128

    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r1_url = grpc_replica1.url
    r2_url = grpc_replica2.url
    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])
    assert v.name == VOLUME_NAME
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev(VOLUME_NAME)

    snap, snap_data = snapshot_tree_build(dev, address, ENGINE_NAME, offset,
                                          length)

    data = random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    # Cleanup replica2
    cleanup_replica(grpc_replica2)

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, r2_url, "ERR")

    verify_read(dev, data_offset, data)

    grpc_controller.replica_delete(replicas[1].address)

    # Rebuild replica2
    open_replica(grpc_replica2)
    cmd.add_replica(address, r2_url)
    wait_for_rebuild_complete(address)

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, r2_url, "RW")

    snapshot_tree_verify(dev, address, ENGINE_NAME, offset, length, snap,
                         snap_data)
Exemplo n.º 8
0
def test_upgrade(
        grpc_engine_manager,  # NOQA
        grpc_controller,  # NOQA
        grpc_fixed_dir_replica1,
        grpc_fixed_dir_replica2,  # NOQA
        grpc_extra_replica1,
        grpc_extra_replica2):  # NOQA

    dev = get_dev(grpc_fixed_dir_replica1, grpc_fixed_dir_replica2,
                  grpc_controller)

    offset = 0
    length = 128

    data = random_string(length)
    verify_data(dev, offset, data)

    # both set pointed to the same volume underlying
    r1_url = grpc_fixed_dir_replica1.url
    r2_url = grpc_fixed_dir_replica2.url
    upgrade_r1_url = grpc_extra_replica1.url
    upgrade_r2_url = grpc_extra_replica2.url

    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])
    assert v.replicaCount == 2

    upgrade_e = upgrade_engine(grpc_engine_manager,
                               LONGHORN_UPGRADE_BINARY,
                               ENGINE_NAME,
                               VOLUME_NAME,
                               replicas=[upgrade_r1_url, upgrade_r2_url])
    assert upgrade_e.spec.binary == LONGHORN_UPGRADE_BINARY

    verify_data(dev, offset, data)

    grpc_controller.client_upgrade(get_process_address(upgrade_e))
    wait_for_process_running(grpc_engine_manager, ENGINE_NAME)

    info = grpc_controller.volume_get()
    assert info.endpoint == path.join(LONGHORN_DEV_DIR, VOLUME_NAME)

    # cannot start with same binary
    # with pytest.raises(grpc.RpcError):
    #     grpc_engine_manager.engine_upgrade(
    #         ENGINE_NAME, LONGHORN_UPGRADE_BINARY,
    #         SIZE, [r1_url, r2_url])
    # verify_data(dev, offset, data)

    # cannot start with wrong replica, would trigger rollback
    with pytest.raises(grpc.RpcError):
        upgrade_engine(grpc_engine_manager, LONGHORN_BINARY, ENGINE_NAME,
                       VOLUME_NAME, ["random"])
    verify_data(dev, offset, data)

    grpc_fixed_dir_replica1 = cleanup_replica(grpc_fixed_dir_replica1)
    grpc_fixed_dir_replica2 = cleanup_replica(grpc_fixed_dir_replica2)
    open_replica(grpc_fixed_dir_replica1)
    open_replica(grpc_fixed_dir_replica2)

    e = upgrade_engine(grpc_engine_manager, LONGHORN_BINARY, ENGINE_NAME,
                       VOLUME_NAME, [r1_url, r2_url])
    assert e.spec.binary == LONGHORN_BINARY

    verify_data(dev, offset, data)

    grpc_controller.client_upgrade(get_process_address(e))
    wait_for_process_running(grpc_engine_manager, ENGINE_NAME)

    time.sleep(3)
    info = grpc_controller.volume_get()
    assert info.endpoint == path.join(LONGHORN_DEV_DIR, VOLUME_NAME)
Exemplo n.º 9
0
def test_single_replica_expansion_failed(grpc_controller,
                                         grpc_fixed_dir_replica1,
                                         grpc_fixed_dir_replica2):  # NOQA
    """
    The test flow:
    1. Write random data into the block device.
    2. Create the 1st snapshot.
    3. Create an empty directory using the tmp meta file path of
       the expansion disk for replica1.
    4. Try to expand the volume. replica1 will be directly marked as ERR state.
       Finally the volume expansion should succeed.
    5. Check the volume status, and if the expanded volume works fine:
       r/w data then create the 2nd snapshot.
    6. Rebuild replica1 and check the replica1 is expanded automatically.
    7. Delete replica2 then check if the rebuilt replica1 works fine.
    """
    address = grpc_controller.address
    r1_url = grpc_fixed_dir_replica1.address
    r2_url = grpc_fixed_dir_replica2.address
    dev = get_dev(grpc_fixed_dir_replica1, grpc_fixed_dir_replica2,
                  grpc_controller)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    # the default size is 4MB, will expand it to 8MB
    zero_char = b'\x00'.decode('utf-8')

    # write the data to the original part then do expansion
    data1_len = random_length(PAGE_SIZE)
    data1 = Data(random.randrange(0, SIZE - 2 * PAGE_SIZE, PAGE_SIZE),
                 data1_len, random_string(data1_len))
    snap1 = Snapshot(dev, data1, address)

    disk_meta_tmp_1 = os.path.join(FIXED_REPLICA_PATH1,
                                   EXPANSION_DISK_TMP_META_NAME)
    os.mkdir(disk_meta_tmp_1)

    # replica1 will fail to expand the size,
    # then engine will directly mark it as ERR state.
    # Finally, The volume expansion should succeed since replica2 works fine.
    grpc_controller.volume_frontend_shutdown()
    grpc_controller.volume_expand(EXPANDED_SIZE)
    wait_for_volume_expansion(grpc_controller, EXPANDED_SIZE)
    grpc_controller.volume_frontend_start(FRONTEND_TGT_BLOCKDEV)

    volume_info = grpc_controller.volume_get()
    assert volume_info.last_expansion_error != ""
    assert volume_info.last_expansion_failed_at != ""
    verify_replica_state(grpc_controller, r1_url, "ERR")
    verify_replica_state(grpc_controller, r2_url, "RW")

    expansion_disk_2 = os.path.join(FIXED_REPLICA_PATH2, EXPANSION_DISK_NAME)
    disk_meta_tmp_2 = os.path.join(FIXED_REPLICA_PATH2,
                                   EXPANSION_DISK_TMP_META_NAME)
    assert os.path.exists(expansion_disk_2)
    assert not os.path.exists(disk_meta_tmp_2)
    # The meta info file should keep unchanged
    replica_meta_file_2 = os.path.join(FIXED_REPLICA_PATH2,
                                       REPLICA_META_FILE_NAME)
    with open(replica_meta_file_2) as f:
        replica_meta_2 = json.load(f)
    assert replica_meta_2["Size"] == EXPANDED_SIZE

    # Cleanup replica1 then check if replica2 works fine
    cleanup_replica(grpc_fixed_dir_replica1)
    verify_replica_state(grpc_controller, r1_url, "ERR")
    grpc_controller.replica_delete(replicas[0].address)

    snap1.verify_data()
    data2_len = random_length(PAGE_SIZE)
    data2 = Data(SIZE - PAGE_SIZE, data2_len, random_string(data2_len))
    snap2 = Snapshot(dev, data2, address)
    snap2.verify_data()
    assert dev.readat(SIZE, SIZE) == zero_char * SIZE

    # Rebuild replica1.
    # The newly opened replica1 will be expanded automatically
    open_replica(grpc_fixed_dir_replica1)
    cmd.add_replica(address, grpc_fixed_dir_replica1.url)
    wait_for_rebuild_complete(address)
    r1 = grpc_fixed_dir_replica1.replica_get()
    assert r1.size == EXPANDED_SIZE_STR
    verify_replica_state(grpc_controller, r1_url, "RW")
    replica_meta_file_1 = os.path.join(FIXED_REPLICA_PATH1,
                                       REPLICA_META_FILE_NAME)
    with open(replica_meta_file_1) as f:
        replica_meta_1 = json.load(f)
    assert replica_meta_1["Size"] == EXPANDED_SIZE

    # Delete replica2 then check if the rebuilt replica1 works fine
    cleanup_replica(grpc_fixed_dir_replica2)
    verify_replica_state(grpc_controller, r2_url, "ERR")
    grpc_controller.replica_delete(replicas[1].address)

    data3_len = random_length(PAGE_SIZE)
    data3 = Data(random.randrange(SIZE, EXPANDED_SIZE - PAGE_SIZE, PAGE_SIZE),
                 data3_len, random_string(data3_len))
    snap3 = Snapshot(dev, data3, address)
    snap1.verify_data()
    snap2.verify_data()
    snap3.verify_data()
    assert \
        dev.readat(SIZE, SIZE) == zero_char*(data3.offset-SIZE) + \
        data3.content + zero_char*(EXPANDED_SIZE-data3.offset-data3.length)
Exemplo n.º 10
0
def test_ha_single_replica_rebuild(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    address = grpc_controller.address

    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r1_url = grpc_replica1.url
    r2_url = grpc_replica2.url
    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev(VOLUME_NAME)

    data = random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    # Cleanup replica2
    cleanup_replica(grpc_replica2)

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, r2_url, "ERR")

    verify_read(dev, data_offset, data)

    grpc_controller.replica_delete(replicas[1].address)

    # Rebuild replica2
    open_replica(grpc_replica2)
    cmd.add_replica(address, r2_url)
    wait_for_rebuild_complete(address)

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, r2_url, "RW")

    verify_read(dev, data_offset, data)

    # WORKAROUND for unable to remove the parent of volume head
    newsnap = cmd.snapshot_create(address)

    info = cmd.snapshot_info(address)
    assert len(info) == 3
    sysnap = info[newsnap]["parent"]
    assert info[sysnap]["parent"] == ""
    assert newsnap in info[sysnap]["children"]
    assert info[sysnap]["usercreated"] is False
    assert info[sysnap]["removed"] is False

    cmd.snapshot_purge(address)
    wait_for_purge_completion(address)

    info = cmd.snapshot_info(address)
    assert len(info) == 2
    assert info[newsnap] is not None
    assert info[VOLUME_HEAD] is not None
Exemplo n.º 11
0
def test_expansion_rollback_with_rebuild(grpc_controller,
                                         grpc_fixed_dir_replica1,
                                         grpc_fixed_dir_replica2):  # NOQA
    """
    The test flow:
    1. Write random data into the block device.
    2. Create the 1st snapshot.
    3. Create an empty directory using the tmp meta file path of
       the expansion disk for each replica.
       This will fail the following expansion and trigger expansion rollback.
    4. Try to expand the volume but fails. Then the automatic rollback will
       be applied implicitly.
    5. Check the volume status and if there are leftovers of
       the failed expansion.
    6. Check if the volume is still usable by r/w data,
       then create the 2nd snapshot.
    7. Retry expansion. It should succeed.
    8. Verify the data and try data r/w.
    9. Delete then rebuild the replica2.
       Then rebuilt replica2 will be expanded automatically.
    10. Delete the replica1 then check if the rebuilt replica2 works fine.
    """
    address = grpc_controller.address
    r1_url = grpc_fixed_dir_replica1.address
    r2_url = grpc_fixed_dir_replica2.address
    dev = get_dev(grpc_fixed_dir_replica1, grpc_fixed_dir_replica2,
                  grpc_controller)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    # the default size is 4MB, will expand it to 8MB
    zero_char = b'\x00'.decode('utf-8')
    original_data = zero_char * SIZE

    # write the data to the original part then do expansion
    data1_len = random_length(PAGE_SIZE)
    data1 = Data(random.randrange(0, SIZE - 2 * PAGE_SIZE, PAGE_SIZE),
                 data1_len, random_string(data1_len))
    snap1 = Snapshot(dev, data1, address)

    # use the tmp meta file path of expansion disks to create empty directories
    # so that the expansion disk meta data update will fail.
    # Then expansion will fail and the rollback will be triggered.
    disk_meta_tmp_1 = os.path.join(FIXED_REPLICA_PATH1,
                                   EXPANSION_DISK_TMP_META_NAME)
    disk_meta_tmp_2 = os.path.join(FIXED_REPLICA_PATH2,
                                   EXPANSION_DISK_TMP_META_NAME)
    os.mkdir(disk_meta_tmp_1)
    os.mkdir(disk_meta_tmp_2)

    # All replicas' expansion will fail
    # then engine will do rollback automatically
    grpc_controller.volume_frontend_shutdown()
    grpc_controller.volume_expand(EXPANDED_SIZE)
    wait_for_volume_expansion(grpc_controller, SIZE)
    grpc_controller.volume_frontend_start(FRONTEND_TGT_BLOCKDEV)

    # Expansion should fail but the expansion rollback should succeed
    volume_info = grpc_controller.volume_get()
    assert volume_info.last_expansion_error != ""
    assert volume_info.last_expansion_failed_at != ""
    verify_replica_state(grpc_controller, r1_url, "RW")
    verify_replica_state(grpc_controller, r2_url, "RW")

    # The invalid disk and head will be cleaned up automatically
    # after the rollback
    expansion_disk_1 = os.path.join(FIXED_REPLICA_PATH1, EXPANSION_DISK_NAME)
    expansion_disk_2 = os.path.join(FIXED_REPLICA_PATH2, EXPANSION_DISK_NAME)
    assert not os.path.exists(expansion_disk_1)
    assert not os.path.exists(expansion_disk_2)
    assert not os.path.exists(disk_meta_tmp_1)
    assert not os.path.exists(disk_meta_tmp_2)
    # The meta info file should keep unchanged
    replica_meta_file_1 = os.path.join(FIXED_REPLICA_PATH1,
                                       REPLICA_META_FILE_NAME)
    replica_meta_file_2 = os.path.join(FIXED_REPLICA_PATH2,
                                       REPLICA_META_FILE_NAME)
    with open(replica_meta_file_1) as f:
        replica_meta_1 = json.load(f)
    assert replica_meta_1["Size"] == SIZE
    with open(replica_meta_file_2) as f:
        replica_meta_2 = json.load(f)
    assert replica_meta_2["Size"] == SIZE

    # try to check then write new data
    snap1.verify_data()
    data2_len = random_length(PAGE_SIZE)
    data2 = Data(SIZE - PAGE_SIZE, data2_len, random_string(data2_len))
    snap2 = Snapshot(dev, data2, address)

    # Retry expansion
    expand_volume_with_frontend(grpc_controller, EXPANDED_SIZE)
    wait_and_check_volume_expansion(grpc_controller, EXPANDED_SIZE)
    with open(replica_meta_file_1) as f:
        replica_meta_1 = json.load(f)
    assert replica_meta_1["Size"] == EXPANDED_SIZE
    with open(replica_meta_file_2) as f:
        replica_meta_2 = json.load(f)
    assert replica_meta_2["Size"] == EXPANDED_SIZE

    assert os.path.exists(expansion_disk_1)
    assert os.path.exists(expansion_disk_2)

    snap1.verify_data()
    snap2.verify_data()
    assert dev.readat(SIZE, SIZE) == zero_char * SIZE

    data3_len = random_length(PAGE_SIZE)
    data3 = Data(random.randrange(SIZE, EXPANDED_SIZE - PAGE_SIZE, PAGE_SIZE),
                 data3_len, random_string(data3_len))
    snap3 = Snapshot(dev, data3, address)
    snap1.verify_data()
    snap2.verify_data()
    snap3.verify_data()
    assert \
        dev.readat(SIZE, SIZE) == zero_char*(data3.offset-SIZE) + \
        data3.content + zero_char*(EXPANDED_SIZE-data3.offset-data3.length)

    # Delete replica2
    cleanup_replica(grpc_fixed_dir_replica2)
    verify_replica_state(grpc_controller, r2_url, "ERR")
    grpc_controller.replica_delete(replicas[1].address)

    # Rebuild replica2.
    open_replica(grpc_fixed_dir_replica2)
    # The newly opened replica2 will be expanded automatically
    cmd.add_replica(address, grpc_fixed_dir_replica2.url)
    wait_for_rebuild_complete(address)
    verify_replica_state(grpc_controller, r2_url, "RW")

    # Cleanup replica1 then check if the rebuilt replica2 works fine
    cleanup_replica(grpc_fixed_dir_replica1)
    verify_replica_state(grpc_controller, r1_url, "ERR")
    grpc_controller.replica_delete(replicas[0].address)

    assert \
        dev.readat(0, SIZE) == \
        original_data[0:data1.offset] + data1.content + \
        original_data[data1.offset+data1.length:data2.offset] + \
        data2.content + \
        original_data[data2.offset+data2.length:]
    assert \
        dev.readat(SIZE, SIZE) == zero_char*(data3.offset-SIZE) + \
        data3.content + zero_char*(EXPANDED_SIZE-data3.offset-data3.length)

    data4_len = random_length(PAGE_SIZE)
    data4 = Data(data1.offset, data4_len, random_string(data4_len))
    snap4 = Snapshot(dev, data4, address)
    snap4.verify_data()
Exemplo n.º 12
0
def test_expansion_with_rebuild(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    address = grpc_controller.address
    dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    # the default size is 4MB, will expand it to 8MB
    address = grpc_controller.address
    zero_char = b'\x00'.decode('utf-8')
    original_data = zero_char * SIZE

    # write the data to the original part then do expansion
    data1_len = random_length(PAGE_SIZE)
    data1 = Data(random.randrange(0, SIZE - 2 * PAGE_SIZE, PAGE_SIZE),
                 data1_len, random_string(data1_len))
    snap1 = Snapshot(dev, data1, address)

    expand_volume_with_frontend(grpc_controller, EXPANDED_SIZE)
    wait_and_check_volume_expansion(grpc_controller, EXPANDED_SIZE)

    snap1.verify_data()
    assert \
        dev.readat(0, SIZE) == \
        original_data[0:data1.offset] + data1.content + \
        original_data[data1.offset+data1.length:]
    assert dev.readat(SIZE, SIZE) == zero_char * SIZE

    # write the data to both the original part and the expanded part
    data2_len = random_length(PAGE_SIZE)
    data2 = Data(SIZE - PAGE_SIZE, data2_len, random_string(data2_len))
    snap2 = Snapshot(dev, data2, address)
    data3_len = random_length(PAGE_SIZE)
    data3 = Data(random.randrange(SIZE, EXPANDED_SIZE - PAGE_SIZE, PAGE_SIZE),
                 data3_len, random_string(data3_len))
    snap3 = Snapshot(dev, data3, address)
    snap1.verify_data()
    snap2.verify_data()
    snap3.verify_data()
    assert \
        dev.readat(SIZE, SIZE) == zero_char*(data3.offset-SIZE) + \
        data3.content + zero_char*(EXPANDED_SIZE-data3.offset-data3.length)

    # Cleanup replica2
    cleanup_replica(grpc_replica2)
    verify_replica_state(grpc_controller, grpc_replica2.address, "ERR")
    grpc_controller.replica_delete(replicas[1].address)

    # Rebuild replica2.
    open_replica(grpc_replica2)
    # The newly opened replica2 will be expanded automatically
    cmd.add_replica(address, grpc_replica2.url)
    wait_for_rebuild_complete(address)
    verify_replica_state(grpc_controller, grpc_replica2.address, "RW")

    # Cleanup replica1 then check if the rebuilt replica2 works fine
    cleanup_replica(grpc_replica1)
    verify_replica_state(grpc_controller, grpc_replica1.address, "ERR")
    grpc_controller.replica_delete(replicas[0].address)

    assert \
        dev.readat(0, SIZE) == \
        original_data[0:data1.offset] + data1.content + \
        original_data[data1.offset+data1.length:data2.offset] + \
        data2.content + \
        original_data[data2.offset+data2.length:]
    assert \
        dev.readat(SIZE, SIZE) == zero_char*(data3.offset-SIZE) + \
        data3.content + zero_char*(EXPANDED_SIZE-data3.offset-data3.length)

    data4_len = random_length(PAGE_SIZE)
    data4 = Data(data1.offset, data4_len, random_string(data4_len))
    snap4 = Snapshot(dev, data4, address)
    snap4.verify_data()
Exemplo n.º 13
0
def test_ha_double_replica_rebuild(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r1_url = grpc_replica1.url
    r2_url = grpc_replica2.url
    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])
    assert v.name == VOLUME_NAME
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev(VOLUME_NAME)

    data1 = random_string(128)
    data1_offset = 1024
    verify_data(dev, data1_offset, data1)

    # Close replica2
    r2 = grpc_replica2.replica_get()
    assert r2.revisionCounter == 1
    grpc_replica2.replica_close()

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, r2_url, "ERR")

    verify_read(dev, data1_offset, data1)

    data2 = random_string(128)
    data2_offset = 512
    verify_data(dev, data2_offset, data2)

    # Close replica1
    r1 = grpc_replica1.replica_get()
    assert r1.revisionCounter == 12  # 1 + 10 + 1
    grpc_replica1.replica_close()

    # Restart volume
    cleanup_controller(grpc_controller)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    # NOTE the order is reversed here
    r1_url = grpc_replica1.url
    r2_url = grpc_replica2.url
    v = grpc_controller.volume_start(replicas=[r2_url, r1_url])
    assert v.replicaCount == 2

    # replica2 is out because of lower revision counter
    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "ERR"
    assert replicas[1].mode == "RW"

    verify_read(dev, data1_offset, data1)
    verify_read(dev, data2_offset, data2)

    # Rebuild replica2
    r2 = grpc_replica2.replica_get()
    assert r2.revisionCounter == 1
    grpc_replica2.replica_close()

    grpc_controller.replica_delete(replicas[0].address)

    cmd.add_replica(grpc_controller.address, r2_url)
    wait_for_rebuild_complete(grpc_controller.address)

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, r2_url, "RW")

    verify_read(dev, data1_offset, data1)
    verify_read(dev, data2_offset, data2)

    r1 = grpc_replica1.replica_get()
    r2 = grpc_replica2.replica_get()
    assert r1.revisionCounter == 22  # 1 + 10 + 1 + 10
    assert r2.revisionCounter == 22  # must be in sync with r1
Exemplo n.º 14
0
def test_restore_with_rebuild(grpc_controller, grpc_replica1, grpc_replica2,
                              grpc_controller_no_frontend,
                              grpc_fixed_dir_replica1, grpc_fixed_dir_replica2,
                              backup_targets):  # NOQA

    # Pick up a random backup target.
    backup_target = backup_targets[random.randint(0, 1)]

    address = grpc_controller.address
    dr_address = grpc_controller_no_frontend.address

    try:
        cmd.backup_volume_rm(address, VOLUME_NAME, backup_target)
    except Exception:
        pass

    dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller)

    start_no_frontend_volume(grpc_controller_no_frontend,
                             grpc_fixed_dir_replica1)

    data0_len = random_length(PAGE_SIZE)
    data0 = Data(random.randrange(0, SIZE - 2 * PAGE_SIZE, PAGE_SIZE),
                 data0_len, random_string(data0_len))
    snap0 = Snapshot(dev, data0, address)
    backup0_info = create_backup(address, snap0.name, backup_target)
    assert backup0_info["VolumeName"] == VOLUME_NAME
    assert backup0_info["Size"] == str(BLOCK_SIZE)

    cmd.backup_restore(dr_address, backup0_info["URL"])
    wait_for_restore_completion(dr_address, backup0_info["URL"])
    verify_no_frontend_data(data0.offset, data0.content,
                            grpc_controller_no_frontend)

    open_replica(grpc_fixed_dir_replica2)
    cmd.add_replica(dr_address, grpc_fixed_dir_replica2.url, True)

    replicas = grpc_controller_no_frontend.replica_list()
    assert len(replicas) == 2
    rw_replica, wo_replica = 0, 0
    for r in replicas:
        if r.mode == 'RW':
            rw_replica += 1
        else:
            assert r.mode == "WO"
            wo_replica += 1
    assert rw_replica == 1 and wo_replica == 1

    # The old replica will fail the restore but the error won't be recorded.
    # Then rebuilding replica will start full restore.
    with pytest.raises(subprocess.CalledProcessError) as e:
        cmd.backup_restore(dr_address, backup0_info["URL"])
    assert "already restored backup" in e.value.stdout
    wait_for_restore_completion(dr_address, backup0_info["URL"])

    # Need to manually verify the rebuilding replica for the restore volume
    cmd.verify_rebuild_replica(dr_address, grpc_fixed_dir_replica2.url)
    replicas = grpc_controller_no_frontend.replica_list()
    assert len(replicas) == 2
    for r in replicas:
        assert r.mode == 'RW'

    # Delete the old replica then check if the rebuilt replica works fine.
    cleanup_replica(grpc_fixed_dir_replica1)
    grpc_controller_no_frontend.replica_delete(grpc_fixed_dir_replica1.address)
    verify_no_frontend_data(data0.offset, data0.content,
                            grpc_controller_no_frontend)

    cmd.backup_volume_rm(grpc_controller.address, VOLUME_NAME, backup_target)
Exemplo n.º 15
0
def test_inc_restore_with_rebuild_and_expansion(grpc_controller, grpc_replica1,
                                                grpc_replica2,
                                                grpc_controller_no_frontend,
                                                grpc_fixed_dir_replica1,
                                                grpc_fixed_dir_replica2,
                                                backup_targets):  # NOQA

    # Pick up a random backup target.
    backup_target = backup_targets[random.randint(0, 1)]

    address = grpc_controller.address
    dr_address = grpc_controller_no_frontend.address

    try:
        cmd.backup_volume_rm(address, VOLUME_NAME, backup_target)
    except Exception:
        pass

    dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller)

    start_no_frontend_volume(grpc_controller_no_frontend,
                             grpc_fixed_dir_replica1)

    data0_len = random_length(PAGE_SIZE)
    data0 = Data(random.randrange(0, SIZE - 2 * PAGE_SIZE, PAGE_SIZE),
                 data0_len, random_string(data0_len))
    snap0 = Snapshot(dev, data0, address)

    backup0_info = create_backup(address, snap0.name, backup_target)
    assert backup0_info["VolumeName"] == VOLUME_NAME
    assert backup0_info["Size"] == str(BLOCK_SIZE)

    cmd.backup_restore(dr_address, backup0_info["URL"])
    wait_for_restore_completion(dr_address, backup0_info["URL"])
    verify_no_frontend_data(data0.offset, data0.content,
                            grpc_controller_no_frontend)

    expand_volume_with_frontend(grpc_controller, EXPANDED_SIZE)
    wait_and_check_volume_expansion(grpc_controller, EXPANDED_SIZE)

    data1_len = random_length(PAGE_SIZE)
    data1 = Data(random.randrange(SIZE, EXPANDED_SIZE - PAGE_SIZE, PAGE_SIZE),
                 data1_len, random_string(data1_len))
    snap1 = Snapshot(dev, data1, address)

    backup1_info = create_backup(address, snap1.name, backup_target,
                                 EXPANDED_SIZE_STR)
    assert backup1_info["VolumeName"] == VOLUME_NAME
    assert backup1_info["Size"] == str(2 * BLOCK_SIZE)

    backup_volumes = cmd.backup_volume_list(address, VOLUME_NAME,
                                            backup_target)
    assert VOLUME_NAME in backup_volumes
    url = get_backup_volume_url(backup_target, VOLUME_NAME)
    backup_info = cmd.backup_inspect_volume(address, url)
    assert backup_info["Size"] == EXPANDED_SIZE_STR

    # restore command invocation should error out
    with pytest.raises(subprocess.CalledProcessError) as e:
        cmd.backup_restore(dr_address, backup1_info["URL"])
    assert "need to expand the DR volume" in e.value.stdout

    # The above restore error is triggered before calling the replicas.
    # Hence the error won't be recorded in the restore status
    # and we can continue restoring backups for the DR volume.
    rs = cmd.restore_status(dr_address)
    for status in rs.values():
        assert status['backupURL'] == backup0_info["URL"]
        assert status['lastRestored'] == backup0_info["Name"]
        assert 'error' not in status.keys()
        assert not status["isRestoring"]

    grpc_controller_no_frontend.volume_expand(EXPANDED_SIZE)
    wait_for_volume_expansion(grpc_controller_no_frontend, EXPANDED_SIZE)

    # This restore command will trigger snapshot purge.
    # And the error is triggered before calling the replicas.
    with pytest.raises(subprocess.CalledProcessError) as e:
        cmd.backup_restore(dr_address, backup1_info["URL"])
    assert "found more than 1 snapshot in the replicas, " \
           "hence started to purge snapshots before the restore" \
           in e.value.stdout
    wait_for_purge_completion(dr_address)

    snaps_info = cmd.snapshot_info(dr_address)
    assert len(snaps_info) == 2
    volume_head_name = "volume-head"
    snap_name = "expand-" + EXPANDED_SIZE_STR
    head_info = snaps_info[volume_head_name]
    assert head_info["name"] == volume_head_name
    assert head_info["parent"] == snap_name
    assert not head_info["children"]
    assert head_info["usercreated"] is False
    snap_info = snaps_info[snap_name]
    assert snap_info["name"] == snap_name
    assert not snap_info["parent"]
    assert volume_head_name in snap_info["children"]
    assert snap_info["usercreated"] is False

    cmd.backup_restore(dr_address, backup1_info["URL"])
    wait_for_restore_completion(dr_address, backup1_info["URL"])
    verify_no_frontend_data(data1.offset, data1.content,
                            grpc_controller_no_frontend)

    # For DR volume, the rebuilding replica won't be expanded automatically.
    open_replica(grpc_fixed_dir_replica2)
    with pytest.raises(subprocess.CalledProcessError):
        cmd.add_replica(dr_address, grpc_fixed_dir_replica2.url, True)

    # Manually expand the rebuilding replica then retry `add-replica`.
    grpc_fixed_dir_replica2.replica_open()
    grpc_fixed_dir_replica2.replica_expand(EXPANDED_SIZE)
    grpc_fixed_dir_replica2.replica_close()
    cmd.add_replica(dr_address, grpc_fixed_dir_replica2.url, True)

    replicas = grpc_controller_no_frontend.replica_list()
    assert len(replicas) == 2
    rw_replica, wo_replica = 0, 0
    for r in replicas:
        if r.mode == 'RW':
            rw_replica += 1
        else:
            assert r.mode == "WO"
            wo_replica += 1
    assert rw_replica == 1 and wo_replica == 1

    # The old replica will fail the restore but the error won't be recorded.
    # Then rebuilding replica will start full restore.
    with pytest.raises(subprocess.CalledProcessError) as e:
        cmd.backup_restore(dr_address, backup1_info["URL"])
    assert "already restored backup" in e.value.stdout
    wait_for_restore_completion(dr_address, backup1_info["URL"])

    cmd.verify_rebuild_replica(dr_address, grpc_fixed_dir_replica2.url)
    replicas = grpc_controller_no_frontend.replica_list()
    assert len(replicas) == 2
    for r in replicas:
        assert r.mode == 'RW'

    verify_no_frontend_data(data1.offset, data1.content,
                            grpc_controller_no_frontend)

    cmd.backup_volume_rm(grpc_controller.address, VOLUME_NAME, backup_target)