def recurring_job_labels_test(client, labels, volume_name, size=SIZE, backing_image=""): # NOQA host_id = get_self_host_id() client.create_volume(name=volume_name, size=size, numberOfReplicas=2, backingImage=backing_image) volume = common.wait_for_volume_detached(client, volume_name) # Simple Backup Job that runs every 1 minute, retains 1. jobs = [{ "name": RECURRING_JOB_NAME, "cron": "*/1 * * * *", "task": "backup", "retain": 1, "labels": labels }] volume.recurringUpdate(jobs=jobs) volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) write_volume_random_data(volume) # 1 minutes 15s time.sleep(75) labels["we-added-this-label"] = "definitely" jobs[0]["labels"] = labels volume = volume.recurringUpdate(jobs=jobs) volume = wait_for_volume_healthy(client, volume_name) write_volume_random_data(volume) # 2 minutes 15s time.sleep(135) snapshots = volume.snapshotList() count = 0 for snapshot in snapshots: if snapshot.removed is False: count += 1 # 1 from Backup, 1 from Volume Head. assert count == 2 # Verify the Labels on the actual Backup. bv = client.by_id_backupVolume(volume_name) backups = bv.backupList().data assert len(backups) == 1 b = bv.backupGet(name=backups[0].name) for key, val in iter(labels.items()): assert b.labels.get(key) == val assert b.labels.get(RECURRING_JOB_LABEL) == RECURRING_JOB_NAME # One extra Label from RecurringJob. assert len(b.labels) == len(labels) + 1 if backing_image: assert b.volumeBackingImageName == \ backing_image assert b.volumeBackingImageURL != "" cleanup_volume(client, volume)
def test_csi_expansion_with_size_round_up(client, core_api): # NOQA """ test expand longhorn volume 1. Create longhorn volume with size '1Gi' 2. Attach, write data, and detach 3. Expand volume size to '2000000000/2G' and check if size round up '2000683008' 4. Attach, write data, and detach 5. Expand volume size to '2Gi' and check if size is '2147483648' 6. Attach, write data, and detach """ volume_name = generate_volume_name() volume = create_and_check_volume(client, volume_name, 2, str(1 * Gi)) self_hostId = get_self_host_id() volume.attach(hostId=self_hostId, disableFrontend=False) volume = wait_for_volume_healthy(client, volume_name) test_data = write_volume_random_data(volume) volume.detach(hostId="") volume = wait_for_volume_detached(client, volume_name) volume.expand(size="2000000000") wait_for_volume_expansion(client, volume_name) volume = client.by_id_volume(volume_name) assert volume.size == "2000683008" self_hostId = get_self_host_id() volume.attach(hostId=self_hostId, disableFrontend=False) volume = wait_for_volume_healthy(client, volume_name) check_volume_data(volume, test_data, False) test_data = write_volume_random_data(volume) volume.detach(hostId="") volume = wait_for_volume_detached(client, volume_name) volume.expand(size=str(2 * Gi)) wait_for_volume_expansion(client, volume_name) volume = client.by_id_volume(volume_name) assert volume.size == "2147483648" self_hostId = get_self_host_id() volume.attach(hostId=self_hostId, disableFrontend=False) volume = wait_for_volume_healthy(client, volume_name) check_volume_data(volume, test_data, False) volume.detach(hostId="") volume = wait_for_volume_detached(client, volume_name) client.delete(volume) wait_for_volume_delete(client, volume_name)
def test_attach_without_frontend(clients, volume_name): # NOQA for host_id, client in clients.iteritems(): break volume = create_and_check_volume(client, volume_name) lht_hostId = get_self_host_id() volume.attach(hostId=lht_hostId, disableFrontend=False) common.wait_for_volume_healthy(client, volume_name) volume = client.by_id_volume(volume_name) assert volume["disableFrontend"] is False assert volume["frontend"] == "blockdev" snap1_data = write_volume_random_data(volume) snap1 = volume.snapshotCreate() write_volume_random_data(volume) volume.snapshotCreate() volume.detach() volume = common.wait_for_volume_detached(client, volume_name) volume.attach(hostId=lht_hostId, disableFrontend=True) common.wait_for_volume_healthy(client, volume_name) volume = client.by_id_volume(volume_name) engine = get_volume_engine(volume) assert volume["disableFrontend"] is True assert volume["frontend"] == "blockdev" assert engine["endpoint"] == "" volume.snapshotRevert(name=snap1["name"]) volume.detach() volume = common.wait_for_volume_detached(client, volume_name) volume.attach(hostId=lht_hostId, disableFrontend=False) common.wait_for_volume_healthy(client, volume_name) volume = client.by_id_volume(volume_name) assert volume["disableFrontend"] is False assert volume["frontend"] == "blockdev" check_volume_data(volume, snap1_data) client.delete(volume) wait_for_volume_delete(client, volume_name)
def test_recurring_job_in_volume_creation(set_random_backupstore, client, volume_name): # NOQA """ Test create volume with recurring jobs 1. Create volume with recurring jobs though Longhorn API 2. Verify the recurring jobs run correctly """ host_id = get_self_host_id() # error when creating volume with duplicate jobs with pytest.raises(Exception) as e: client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, recurringJobs=create_jobs1() + create_jobs1()) assert "duplicate job" in str(e.value) client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, recurringJobs=create_jobs1()) volume = common.wait_for_volume_detached(client, volume_name) volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) # wait until the beginning of an even minute wait_until_begin_of_an_even_minute() # wait until the 10th second of an even minute # to avoid writing data at the same time backup is taking time.sleep(10) write_volume_random_data(volume) time.sleep(150) # 2.5 minutes write_volume_random_data(volume) time.sleep(150) # 2.5 minutes check_jobs1_result(volume) volume = volume.detach(hostId="") common.wait_for_volume_detached(client, volume_name) client.delete(volume) wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def create_backup(client, volname, data={}): volume = client.by_id_volume(volname) volume.snapshotCreate() if not data: data = write_volume_random_data(volume) else: data = write_volume_data(volume, data) snap = volume.snapshotCreate() volume.snapshotCreate() volume.snapshotBackup(name=snap["name"]) bv, b = common.find_backup(client, volname, snap["name"]) new_b = bv.backupGet(name=b["name"]) assert new_b["name"] == b["name"] assert new_b["url"] == b["url"] assert new_b["snapshotName"] == b["snapshotName"] assert new_b["snapshotCreated"] == b["snapshotCreated"] assert new_b["created"] == b["created"] assert new_b["volumeName"] == b["volumeName"] assert new_b["volumeSize"] == b["volumeSize"] assert new_b["volumeCreated"] == b["volumeCreated"] volume = wait_for_volume_status(client, volname, "lastBackup", b["name"]) assert volume["lastBackupAt"] != "" return bv, b, snap, data
def test_soft_anti_affinity_scheduling(client, volume_name): # NOQA """ Test that volumes with Soft Anti-Affinity work as expected. With Soft Anti-Affinity, a new replica should still be scheduled on a node with an existing replica, which will result in "Healthy" state but limited redundancy. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY) client.update(setting, value="true") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = map(lambda replica: replica.name, volume["replicas"]) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica["name"]) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_hard_anti_affinity_offline_rebuild(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity can build new replicas during the attaching process once a valid node is available. Once a new replica has been built as part of the attaching process, the volume should be Healthy again. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = map(lambda replica: replica.name, volume["replicas"]) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica["name"]) volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) volume.detach() volume = wait_for_volume_detached(client, volume_name) client.update(node, allowScheduling=True) volume.attach(hostId=host_id) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_hard_anti_affinity_live_rebuild(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity can build new replicas live once a valid node is available. If no nodes without existing replicas are available, the volume should remain in "Degraded" state. However, once one is available, the replica should now be scheduled successfully, with the volume returning to "Healthy" state. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = map(lambda replica: replica.name, volume["replicas"]) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica["name"]) wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) # Allow scheduling on host node again client.update(node, allowScheduling=True) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_soft_anti_affinity_detach(client, volume_name): # NOQA """ Test that volumes with Soft Anti-Affinity can detach and reattach to a node properly. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY) client.update(setting, value="true") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = map(lambda replica: replica.name, volume["replicas"]) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica["name"]) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) volume.detach() volume = wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == 3 volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_hard_anti_affinity_detach(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity are still able to detach and reattach to a node properly, even in degraded state. 1. Create a volume and attach to the current node 2. Generate and write `data` to the volume. 3. Set `soft anti-affinity` to false 4. Disable current node's scheduling. 5. Remove the replica on the current node 1. Verify volume will be in degraded state. 2. Verify volume reports condition `scheduled == false` 6. Detach the volume. 7. Verify that volume only have 2 replicas 1. Unhealthy replica will be removed upon detach. 8. Attach the volume again. 1. Verify volume will be in degraded state. 2. Verify volume reports condition `scheduled == false` 3. Verify only two of three replicas of volume are healthy. 4. Verify the remaining replica doesn't have `replica.HostID`, meaning it's unscheduled 9. Check volume `data` """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) volume.detach() volume = wait_for_volume_detached(client, volume_name) assert len(volume.replicas) == 2 volume.attach(hostId=host_id) # Make sure we're still not getting another successful replica. volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) assert sum([ 1 for replica in volume.replicas if replica.running and replica.mode == "RW" ]) == 2 assert sum([1 for replica in volume.replicas if not replica.hostId]) == 1 assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_hard_anti_affinity_scheduling(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity work as expected. With Hard Anti-Affinity, scheduling on nodes with existing replicas should be forbidden, resulting in "Degraded" state. 1. Create a volume and attach to the current node 2. Generate and write `data` to the volume. 3. Set `soft anti-affinity` to false 4. Disable current node's scheduling. 5. Remove the replica on the current node 1. Verify volume will be in degraded state. 2. Verify volume reports condition `scheduled == false` 3. Verify only two of three replicas of volume are healthy. 4. Verify the remaining replica doesn't have `replica.HostID`, meaning it's unscheduled 6. Check volume `data` """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) # Instead of waiting for timeout and lengthening the tests a significant # amount we can make sure the scheduling isn't working by making sure the # volume becomes Degraded and reports a scheduling error. wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) # While there are three replicas that should exist to meet the Volume's # request, only two of those volumes should actually be Healthy. volume = client.by_id_volume(volume_name) assert sum([ 1 for replica in volume.replicas if replica.running and replica.mode == "RW" ]) == 2 # Confirm that the final volume is an unscheduled volume. assert sum([1 for replica in volume.replicas if not replica.hostId]) == 1 # Three replicas in total should still exist. assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def backupstore_test(client, host_id, volname, size): volume = client.by_id_volume(volname) volume.snapshotCreate() data = write_volume_random_data(volume) snap2 = volume.snapshotCreate() volume.snapshotCreate() volume.snapshotBackup(name=snap2["name"]) bv, b = common.find_backup(client, volname, snap2["name"]) new_b = bv.backupGet(name=b["name"]) assert new_b["name"] == b["name"] assert new_b["url"] == b["url"] assert new_b["snapshotName"] == b["snapshotName"] assert new_b["snapshotCreated"] == b["snapshotCreated"] assert new_b["created"] == b["created"] assert new_b["volumeName"] == b["volumeName"] assert new_b["volumeSize"] == b["volumeSize"] assert new_b["volumeCreated"] == b["volumeCreated"] # test restore restoreName = generate_volume_name() volume = client.create_volume(name=restoreName, size=size, numberOfReplicas=2, fromBackup=b["url"]) volume = common.wait_for_volume_detached(client, restoreName) assert volume["name"] == restoreName assert volume["size"] == size assert volume["numberOfReplicas"] == 2 assert volume["state"] == "detached" volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, restoreName) check_volume_data(volume, data) volume = volume.detach() volume = common.wait_for_volume_detached(client, restoreName) client.delete(volume) volume = wait_for_volume_delete(client, restoreName) bv.backupDelete(name=b["name"]) backups = bv.backupList() found = False for b in backups: if b["snapshotName"] == snap2["name"]: found = True break assert not found
def ha_salvage_test(client, volume_name, base_image=""): # NOQA volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) assert volume["name"] == volume_name assert volume["size"] == SIZE assert volume["numberOfReplicas"] == 2 assert volume["state"] == "detached" assert volume["created"] != "" assert volume["baseImage"] == base_image host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 2 replica0_name = volume["replicas"][0]["name"] replica1_name = volume["replicas"][1]["name"] data = write_volume_random_data(volume) common.k8s_delete_replica_pods_for_volume(volume_name) volume = common.wait_for_volume_faulted(client, volume_name) assert len(volume["replicas"]) == 2 assert volume["replicas"][0]["failedAt"] != "" assert volume["replicas"][1]["failedAt"] != "" volume.salvage(names=[replica0_name, replica1_name]) volume = common.wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == 2 assert volume["replicas"][0]["failedAt"] == "" assert volume["replicas"][1]["failedAt"] == "" volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) check_volume_data(volume, data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) client.delete(volume) common.wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def ha_salvage_test(client, volume_name, base_image=""): # NOQA volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) assert volume["name"] == volume_name assert volume["size"] == SIZE assert volume["numberOfReplicas"] == 2 assert volume["state"] == "detached" assert volume["created"] != "" assert volume["baseImage"] == base_image host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 2 replica0_name = volume["replicas"][0]["name"] replica1_name = volume["replicas"][1]["name"] data = write_volume_random_data(volume) common.k8s_delete_replica_pods_for_volume(volume_name) volume = common.wait_for_volume_faulted(client, volume_name) assert len(volume["replicas"]) == 2 assert volume["replicas"][0]["failedAt"] != "" assert volume["replicas"][1]["failedAt"] != "" volume.salvage(names=[replica0_name, replica1_name]) volume = common.wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == 2 assert volume["replicas"][0]["failedAt"] == "" assert volume["replicas"][1]["failedAt"] == "" volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) check_volume_data(volume, data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) client.delete(volume) common.wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def test_hard_anti_affinity_offline_rebuild(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity can build new replicas during the attaching process once a valid node is available. Once a new replica has been built as part of the attaching process, the volume should be Healthy again. 1. Create a volume and attach to the current node 2. Generate and write `data` to the volume. 3. Set `soft anti-affinity` to false 4. Disable current node's scheduling. 5. Remove the replica on the current node 1. Verify volume will be in degraded state. 2. Verify volume reports condition `scheduled == false` 6. Detach the volume. 7. Enable current node's scheduling. 8. Attach the volume again. 9. Wait for volume to become healthy with 3 replicas 10. Check volume `data` """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = map(lambda replica: replica.name, volume.replicas) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) volume.detach() volume = wait_for_volume_detached(client, volume_name) client.update(node, allowScheduling=True) volume.attach(hostId=host_id) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_hard_anti_affinity_live_rebuild(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity can build new replicas live once a valid node is available. If no nodes without existing replicas are available, the volume should remain in "Degraded" state. However, once one is available, the replica should now be scheduled successfully, with the volume returning to "Healthy" state. 1. Create a volume and attach to the current node 2. Generate and write `data` to the volume. 3. Set `soft anti-affinity` to false 4. Disable current node's scheduling. 5. Remove the replica on the current node 1. Verify volume will be in degraded state. 2. Verify volume reports condition `scheduled == false` 6. Enable the current node's scheduling 7. Wait for volume to start rebuilding and become healthy again 8. Check volume `data` """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = map(lambda replica: replica.name, volume.replicas) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) # Allow scheduling on host node again client.update(node, allowScheduling=True) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def ha_rebuild_replica_test(client, volname): # NOQA volume = client.by_id_volume(volname) assert get_volume_endpoint(volume) == DEV_PATH + volname assert len(volume["replicas"]) == 2 replica0 = volume["replicas"][0] assert replica0["name"] != "" replica1 = volume["replicas"][1] assert replica1["name"] != "" data = write_volume_random_data(volume) volume = volume.replicaRemove(name=replica0["name"]) # wait until we saw a replica starts rebuilding new_replica_found = False for i in range(RETRY_COUNTS): v = client.by_id_volume(volname) for r in v["replicas"]: if r["name"] != replica0["name"] and \ r["name"] != replica1["name"]: new_replica_found = True break if new_replica_found: break time.sleep(RETRY_INTERVAL) assert new_replica_found volume = common.wait_for_volume_healthy(client, volname) volume = client.by_id_volume(volname) assert volume["state"] == common.VOLUME_STATE_ATTACHED assert volume["robustness"] == common.VOLUME_ROBUSTNESS_HEALTHY assert len(volume["replicas"]) >= 2 found = False for replica in volume["replicas"]: if replica["name"] == replica1["name"]: found = True break assert found check_volume_data(volume, data)
def ha_rebuild_replica_test(client, volname): # NOQA volume = client.by_id_volume(volname) assert get_volume_endpoint(volume) == DEV_PATH + volname assert len(volume["replicas"]) == 2 replica0 = volume["replicas"][0] assert replica0["name"] != "" replica1 = volume["replicas"][1] assert replica1["name"] != "" data = write_volume_random_data(volume) volume = volume.replicaRemove(name=replica0["name"]) # wait until we saw a replica starts rebuilding new_replica_found = False for i in range(RETRY_COUNTS): v = client.by_id_volume(volname) for r in v["replicas"]: if r["name"] != replica0["name"] and \ r["name"] != replica1["name"]: new_replica_found = True break if new_replica_found: break time.sleep(RETRY_INTERVAL) assert new_replica_found volume = common.wait_for_volume_healthy(client, volname) volume = client.by_id_volume(volname) assert volume["state"] == common.VOLUME_STATE_ATTACHED assert volume["robustness"] == common.VOLUME_ROBUSTNESS_HEALTHY assert len(volume["replicas"]) >= 2 found = False for replica in volume["replicas"]: if replica["name"] == replica1["name"]: found = True break assert found check_volume_data(volume, data)
def test_soft_anti_affinity_detach(client, volume_name): # NOQA """ Test that volumes with Soft Anti-Affinity can detach and reattach to a node properly. 1. Create a volume and attach to the current node. 2. Generate and write `data` to the volume 3. Set `soft anti-affinity` to true 4. Disable current node's scheduling. 5. Remove the replica on the current node 6. Wait for the new replica to be rebuilt 7. Detach the volume. 8. Verify there are 3 replicas 9. Attach the volume again. Verify there are still 3 replicas 10. Verify the `data`. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="true") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = list(map(lambda replica: replica.name, volume.replicas)) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) volume.detach() volume = wait_for_volume_detached(client, volume_name) assert len(volume.replicas) == 3 volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def setup_migration_test(clients, volume_name, backing_image=""): # NOQA """ Creates a new migratable volume then attaches it to the current node to write some test data on it. """ client = get_random_client(clients) volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=REPLICA_COUNT, backingImage=backing_image, accessMode="rwx", migratable=True) volume = common.wait_for_volume_detached(client, volume_name) volume.attach(hostId=common.get_self_host_id()) volume = common.wait_for_volume_healthy(client, volume_name) # write test data data = common.write_volume_random_data(volume) common.check_volume_data(volume, data) volume.detach(hostId="") volume = common.wait_for_volume_detached(client, volume_name) return client, volume, data
def test_hard_anti_affinity_scheduling(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity work as expected. With Hard Anti-Affinity, scheduling on nodes with existing replicas should be forbidden, resulting in "Degraded" state. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica["name"]) # Instead of waiting for timeout and lengthening the tests a significant # amount we can make sure the scheduling isn't working by making sure the # volume becomes Degraded and reports a scheduling error. volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) # While there are three replicas that should exist to meet the Volume's # request, only two of those volumes should actually be Healthy. assert sum([ 1 for replica in volume["replicas"] if replica["running"] and replica["mode"] == "RW" ]) == 2 # Confirm that the final volume is an unscheduled volume. assert sum([1 for replica in volume["replicas"] if not replica["hostId"]]) == 1 # Three replicas in total should still exist. assert len(volume["replicas"]) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_hard_anti_affinity_detach(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity are still able to detach and reattach to a node properly, even in degraded state. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica["name"]) volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) volume.detach() volume = wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == 2 volume.attach(hostId=host_id) # Make sure we're still not getting another successful replica. volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) assert sum([ 1 for replica in volume["replicas"] if replica["running"] and replica["mode"] == "RW" ]) == 2 assert sum([1 for replica in volume["replicas"] if not replica["hostId"]]) == 1 assert len(volume["replicas"]) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_soft_anti_affinity_scheduling(client, volume_name): # NOQA """ Test that volumes with Soft Anti-Affinity work as expected. With Soft Anti-Affinity, a new replica should still be scheduled on a node with an existing replica, which will result in "Healthy" state but limited redundancy. 1. Create a volume and attach to the current node 2. Generate and write `data` to the volume. 3. Set `soft anti-affinity` to true 4. Disable current node's scheduling. 5. Remove the replica on the current node 6. Wait for the volume to complete rebuild. Volume should have 3 replicas. 7. Verify `data` """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="true") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = list(map(lambda replica: replica.name, volume.replicas)) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def ha_salvage_test(client, volume_name, base_image=""): # NOQA volume = create_and_check_volume(client, volume_name, 2, base_image=base_image) host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 2 replica0_name = volume["replicas"][0]["name"] replica1_name = volume["replicas"][1]["name"] data = write_volume_random_data(volume) common.k8s_delete_replica_pods_for_volume(volume_name) volume = common.wait_for_volume_faulted(client, volume_name) assert len(volume["replicas"]) == 2 assert volume["replicas"][0]["failedAt"] != "" assert volume["replicas"][1]["failedAt"] != "" volume.salvage(names=[replica0_name, replica1_name]) volume = common.wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == 2 assert volume["replicas"][0]["failedAt"] == "" assert volume["replicas"][1]["failedAt"] == "" volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) check_volume_data(volume, data) cleanup_volume(client, volume)
def engine_offline_upgrade_test(client, volume_name, base_image=""): # NOQA default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) cli_v = default_img["cliAPIVersion"] cli_minv = default_img["cliAPIMinVersion"] ctl_v = default_img["controllerAPIVersion"] ctl_minv = default_img["controllerAPIMinVersion"] data_v = default_img["dataFormatVersion"] data_minv = default_img["dataFormatMinVersion"] engine_upgrade_image = common.get_upgrade_test_image( cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv) new_img = client.create_engine_image(image=engine_upgrade_image) new_img_name = new_img["name"] new_img = wait_for_engine_image_state(client, new_img_name, "ready") assert new_img["refCount"] == 0 assert new_img["noRefSince"] != "" default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=REPLICA_COUNT, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) default_img = wait_for_engine_image_ref_count(client, default_img_name, 1) original_engine_image = default_img["image"] assert volume["name"] == volume_name assert volume["engineImage"] == original_engine_image assert volume["currentImage"] == original_engine_image assert volume["baseImage"] == base_image # Before our upgrade, write data to the volume first. host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) data = write_volume_random_data(volume) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) volume.engineUpgrade(image=engine_upgrade_image) volume = wait_for_volume_current_image(client, volume_name, engine_upgrade_image) default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) new_img = wait_for_engine_image_ref_count(client, new_img_name, 1) # cannot delete a image in use with pytest.raises(Exception) as e: client.delete(new_img) assert "while being used" in str(e.value) volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) engine = get_volume_engine(volume) assert engine["engineImage"] == engine_upgrade_image assert engine["currentImage"] == engine_upgrade_image for replica in volume["replicas"]: assert replica["engineImage"] == engine_upgrade_image assert replica["currentImage"] == engine_upgrade_image check_volume_data(volume, data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) volume.engineUpgrade(image=original_engine_image) volume = wait_for_volume_current_image(client, volume_name, original_engine_image) engine = get_volume_engine(volume) assert volume["engineImage"] == original_engine_image assert engine["engineImage"] == original_engine_image for replica in volume["replicas"]: assert replica["engineImage"] == original_engine_image default_img = wait_for_engine_image_ref_count(client, default_img_name, 1) new_img = wait_for_engine_image_ref_count(client, new_img_name, 0) volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) engine = get_volume_engine(volume) assert engine["engineImage"] == original_engine_image assert engine["currentImage"] == original_engine_image for replica in volume["replicas"]: assert replica["engineImage"] == original_engine_image assert replica["currentImage"] == original_engine_image check_volume_data(volume, data) client.delete(volume) wait_for_volume_delete(client, volume_name) client.delete(new_img)
def engine_live_upgrade_rollback_test(client, volume_name, base_image=""): # NOQA default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) cli_v = default_img["cliAPIVersion"] cli_minv = default_img["cliAPIMinVersion"] ctl_v = default_img["controllerAPIVersion"] ctl_minv = default_img["controllerAPIMinVersion"] data_v = default_img["dataFormatVersion"] data_minv = default_img["dataFormatMinVersion"] wrong_engine_upgrade_image = common.get_compatibility_test_image( cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv) new_img = client.create_engine_image(image=wrong_engine_upgrade_image) new_img_name = new_img["name"] new_img = wait_for_engine_image_state(client, new_img_name, "ready") assert new_img["refCount"] == 0 assert new_img["noRefSince"] != "" default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) default_img = wait_for_engine_image_ref_count(client, default_img_name, 1) assert volume["baseImage"] == base_image original_engine_image = volume["engineImage"] assert original_engine_image != wrong_engine_upgrade_image host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) data = write_volume_random_data(volume) volume.engineUpgrade(image=wrong_engine_upgrade_image) volume = client.by_id_volume(volume["name"]) assert volume["engineImage"] == wrong_engine_upgrade_image assert volume["currentImage"] == original_engine_image with pytest.raises(Exception): # this will timeout wait_for_volume_current_image(client, volume_name, wrong_engine_upgrade_image) # rollback volume.engineUpgrade(image=original_engine_image) volume = wait_for_volume_current_image(client, volume_name, original_engine_image) assert volume["engineImage"] == original_engine_image assert volume["currentImage"] == original_engine_image engine = get_volume_engine(volume) assert engine["engineImage"] == original_engine_image assert engine["currentImage"] == original_engine_image volume = common.wait_for_volume_replica_count(client, volume_name, REPLICA_COUNT) check_volume_data(volume, data) assert volume["state"] == common.VOLUME_STATE_ATTACHED assert volume["robustness"] == common.VOLUME_ROBUSTNESS_HEALTHY # try again, this time let's try detach volume.engineUpgrade(image=wrong_engine_upgrade_image) volume = client.by_id_volume(volume["name"]) assert volume["engineImage"] == wrong_engine_upgrade_image assert volume["currentImage"] == original_engine_image with pytest.raises(Exception): # this will timeout wait_for_volume_current_image(client, volume_name, wrong_engine_upgrade_image) volume = volume.detach() volume = wait_for_volume_current_image(client, volume_name, wrong_engine_upgrade_image) # all the images would be updated assert volume["engineImage"] == wrong_engine_upgrade_image engine = get_volume_engine(volume) assert engine["engineImage"] == wrong_engine_upgrade_image volume = common.wait_for_volume_replica_count(client, volume_name, REPLICA_COUNT) for replica in volume["replicas"]: assert replica["engineImage"] == wrong_engine_upgrade_image # upgrade to the correct image when offline volume.engineUpgrade(image=original_engine_image) volume = client.by_id_volume(volume["name"]) assert volume["engineImage"] == original_engine_image volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert volume["engineImage"] == original_engine_image assert volume["currentImage"] == original_engine_image engine = get_volume_engine(volume) assert engine["engineImage"] == original_engine_image assert engine["currentImage"] == original_engine_image for replica in volume["replicas"]: assert replica["engineImage"] == original_engine_image assert replica["currentImage"] == original_engine_image check_volume_data(volume, data) client.delete(volume) wait_for_volume_delete(client, volume_name) client.delete(new_img)
def engine_live_upgrade_test(client, volume_name, base_image=""): # NOQA default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) cli_v = default_img["cliAPIVersion"] cli_minv = default_img["cliAPIMinVersion"] ctl_v = default_img["controllerAPIVersion"] ctl_minv = default_img["controllerAPIMinVersion"] data_v = default_img["dataFormatVersion"] data_minv = default_img["dataFormatMinVersion"] engine_upgrade_image = common.get_upgrade_test_image( cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv) new_img = client.create_engine_image(image=engine_upgrade_image) new_img_name = new_img["name"] new_img = wait_for_engine_image_state(client, new_img_name, "ready") assert new_img["refCount"] == 0 assert new_img["noRefSince"] != "" default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) default_img = wait_for_engine_image_ref_count(client, default_img_name, 1) assert volume["name"] == volume_name assert volume["baseImage"] == base_image original_engine_image = volume["engineImage"] assert original_engine_image != engine_upgrade_image host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert volume["engineImage"] == original_engine_image assert volume["currentImage"] == original_engine_image engine = get_volume_engine(volume) assert engine["engineImage"] == original_engine_image assert engine["currentImage"] == original_engine_image for replica in volume["replicas"]: assert replica["engineImage"] == original_engine_image assert replica["currentImage"] == original_engine_image data = write_volume_random_data(volume) volume.engineUpgrade(image=engine_upgrade_image) volume = wait_for_volume_current_image(client, volume_name, engine_upgrade_image) engine = get_volume_engine(volume) assert engine["engineImage"] == engine_upgrade_image default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) new_img = wait_for_engine_image_ref_count(client, new_img_name, 1) count = 0 # old replica may be in deletion process for replica in volume["replicas"]: if replica["currentImage"] == engine_upgrade_image: count += 1 assert count == REPLICA_COUNT check_volume_data(volume, data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == REPLICA_COUNT assert volume["engineImage"] == engine_upgrade_image engine = get_volume_engine(volume) assert engine["engineImage"] == engine_upgrade_image for replica in volume["replicas"]: assert replica["engineImage"] == engine_upgrade_image volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert volume["engineImage"] == engine_upgrade_image assert volume["currentImage"] == engine_upgrade_image engine = get_volume_engine(volume) assert engine["engineImage"] == engine_upgrade_image assert engine["currentImage"] == engine_upgrade_image for replica in volume["replicas"]: assert replica["engineImage"] == engine_upgrade_image assert replica["currentImage"] == engine_upgrade_image # Make sure detaching didn't somehow interfere with the data. check_volume_data(volume, data) volume.engineUpgrade(image=original_engine_image) volume = wait_for_volume_current_image(client, volume_name, original_engine_image) engine = get_volume_engine(volume) assert engine["engineImage"] == original_engine_image default_img = wait_for_engine_image_ref_count(client, default_img_name, 1) new_img = wait_for_engine_image_ref_count(client, new_img_name, 0) assert volume["engineImage"] == original_engine_image engine = get_volume_engine(volume) assert engine["engineImage"] == original_engine_image count = 0 # old replica may be in deletion process for replica in volume["replicas"]: if replica["engineImage"] == original_engine_image: count += 1 assert count == REPLICA_COUNT check_volume_data(volume, data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == REPLICA_COUNT assert volume["engineImage"] == original_engine_image engine = get_volume_engine(volume) assert engine["engineImage"] == original_engine_image for replica in volume["replicas"]: assert replica["engineImage"] == original_engine_image client.delete(volume) wait_for_volume_delete(client, volume_name) client.delete(new_img)
def snapshot_test(clients, volume_name, base_image): # NOQA for host_id, client in clients.iteritems(): break volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) assert volume["name"] == volume_name assert volume["size"] == SIZE assert volume["numberOfReplicas"] == 2 assert volume["state"] == "detached" assert volume["baseImage"] == base_image lht_hostId = get_self_host_id() volume = volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, volume_name) volume = client.by_id_volume(volume_name) positions = {} snap1 = volume.snapshotCreate() snap2_data = write_volume_random_data(volume, positions) snap2 = volume.snapshotCreate() snap3_data = write_volume_random_data(volume, positions) snap3 = volume.snapshotCreate() snapshots = volume.snapshotList() snapMap = {} for snap in snapshots: snapMap[snap["name"]] = snap assert snapMap[snap1["name"]]["name"] == snap1["name"] assert snapMap[snap1["name"]]["removed"] is False assert snapMap[snap2["name"]]["name"] == snap2["name"] assert snapMap[snap2["name"]]["parent"] == snap1["name"] assert snapMap[snap2["name"]]["removed"] is False assert snapMap[snap3["name"]]["name"] == snap3["name"] assert snapMap[snap3["name"]]["parent"] == snap2["name"] assert snapMap[snap3["name"]]["removed"] is False volume.snapshotDelete(name=snap3["name"]) check_volume_data(volume, snap3_data) snapshots = volume.snapshotList(volume=volume_name) snapMap = {} for snap in snapshots: snapMap[snap["name"]] = snap assert snapMap[snap1["name"]]["name"] == snap1["name"] assert snapMap[snap1["name"]]["removed"] is False assert snapMap[snap2["name"]]["name"] == snap2["name"] assert snapMap[snap2["name"]]["parent"] == snap1["name"] assert snapMap[snap2["name"]]["removed"] is False assert snapMap[snap3["name"]]["name"] == snap3["name"] assert snapMap[snap3["name"]]["parent"] == snap2["name"] assert len(snapMap[snap3["name"]]["children"]) == 1 assert "volume-head" in snapMap[snap3["name"]]["children"] assert snapMap[snap3["name"]]["removed"] is True snap = volume.snapshotGet(name=snap3["name"]) assert snap["name"] == snap3["name"] assert snap["parent"] == snap3["parent"] assert len(snap3["children"]) == 1 assert len(snap["children"]) == 1 assert "volume-head" in snap3["children"] assert "volume-head" in snap["children"] assert snap["removed"] is True volume.snapshotRevert(name=snap2["name"]) check_volume_data(volume, snap2_data) snapshots = volume.snapshotList(volume=volume_name) snapMap = {} for snap in snapshots: snapMap[snap["name"]] = snap assert snapMap[snap1["name"]]["name"] == snap1["name"] assert snapMap[snap1["name"]]["removed"] is False assert snapMap[snap2["name"]]["name"] == snap2["name"] assert snapMap[snap2["name"]]["parent"] == snap1["name"] assert "volume-head" in snapMap[snap2["name"]]["children"] assert snap3["name"] in snapMap[snap2["name"]]["children"] assert snapMap[snap2["name"]]["removed"] is False assert snapMap[snap3["name"]]["name"] == snap3["name"] assert snapMap[snap3["name"]]["parent"] == snap2["name"] assert len(snapMap[snap3["name"]]["children"]) == 0 assert snapMap[snap3["name"]]["removed"] is True volume.snapshotDelete(name=snap1["name"]) volume.snapshotDelete(name=snap2["name"]) volume.snapshotPurge() wait_for_snapshot_purge(volume, snap1["name"], snap3["name"]) snapshots = volume.snapshotList(volume=volume_name) snapMap = {} for snap in snapshots: snapMap[snap["name"]] = snap assert snap1["name"] not in snapMap assert snap3["name"] not in snapMap # it's the parent of volume-head, so it cannot be purged at this time assert snapMap[snap2["name"]]["name"] == snap2["name"] assert snapMap[snap2["name"]]["parent"] == "" assert "volume-head" in snapMap[snap2["name"]]["children"] assert snapMap[snap2["name"]]["removed"] is True check_volume_data(volume, snap2_data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) client.delete(volume) volume = wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def test_setting_toleration(): """ Test toleration setting 1. Set `taint-toleration` to "key1=value1:NoSchedule; key2:InvalidEffect". 2. Verify the request fails. 3. Create a volume and attach it. 4. Set `taint-toleration` to "key1=value1:NoSchedule; key2:NoExecute". 5. Verify that cannot update toleration setting when any volume is attached. 6. Generate and write `data1` into the volume. 7. Detach the volume. 8. Set `taint-toleration` to "key1=value1:NoSchedule; key2:NoExecute". 9. Wait for all the Longhorn system components to restart with new toleration. 10. Verify that UI, manager, and drive deployer don't restart and don't have new toleration. 11. Attach the volume again and verify the volume `data1`. 12. Generate and write `data2` to the volume. 13. Detach the volume. 14. Clean the `toleration` setting. 15. Wait for all the Longhorn system components to restart with no toleration. 16. Attach the volume and validate `data2`. 17. Generate and write `data3` to the volume. """ client = get_longhorn_api_client() # NOQA apps_api = get_apps_api_client() # NOQA core_api = get_core_api_client() # NOQA count = len(client.list_node()) setting = client.by_id_setting(SETTING_TAINT_TOLERATION) with pytest.raises(Exception) as e: client.update(setting, value="key1=value1:NoSchedule; key2:InvalidEffect") assert 'invalid effect' in str(e.value) volume_name = "test-toleration-vol" # NOQA volume = create_and_check_volume(client, volume_name) volume.attach(hostId=get_self_host_id()) volume = wait_for_volume_healthy(client, volume_name) setting_value_str = "key1=value1:NoSchedule; key2:NoExecute" setting_value_dicts = [ { "key": "key1", "value": "value1", "operator": "Equal", "effect": "NoSchedule" }, { "key": "key2", "value": None, "operator": "Exists", "effect": "NoExecute" }, ] with pytest.raises(Exception) as e: client.update(setting, value=setting_value_str) assert 'cannot modify toleration setting before all volumes are detached' \ in str(e.value) data1 = write_volume_random_data(volume) check_volume_data(volume, data1) volume.detach(hostId="") wait_for_volume_detached(client, volume_name) setting = client.update(setting, value=setting_value_str) assert setting.value == setting_value_str wait_for_toleration_update(core_api, apps_api, count, setting_value_dicts) client, node = wait_for_longhorn_node_ready() volume = client.by_id_volume(volume_name) volume.attach(hostId=node) volume = wait_for_volume_healthy(client, volume_name) check_volume_data(volume, data1) data2 = write_volume_random_data(volume) check_volume_data(volume, data2) volume.detach(hostId="") wait_for_volume_detached(client, volume_name) # cleanup setting_value_str = "" setting_value_dicts = [] setting = client.by_id_setting(SETTING_TAINT_TOLERATION) setting = client.update(setting, value=setting_value_str) assert setting.value == setting_value_str wait_for_toleration_update(core_api, apps_api, count, setting_value_dicts) client, node = wait_for_longhorn_node_ready() volume = client.by_id_volume(volume_name) volume.attach(hostId=node) volume = wait_for_volume_healthy(client, volume_name) check_volume_data(volume, data2) data3 = write_volume_random_data(volume) check_volume_data(volume, data3) cleanup_volume(client, volume)
def test_setting_priority_class(core_api, apps_api, scheduling_api, priority_class, volume_name): # NOQA """ Test that the Priority Class setting is validated and utilized correctly. 1. Verify that the name of a non-existent Priority Class cannot be used for the Setting. 2. Create a new Priority Class in Kubernetes. 3. Create and attach a Volume. 4. Verify that the Priority Class Setting cannot be updated with an attached Volume. 5. Generate and write `data1`. 6. Detach the Volume. 7. Update the Priority Class Setting to the new Priority Class. 8. Wait for all the Longhorn system components to restart with the new Priority Class. 9. Verify that UI, manager, and drive deployer don't have Priority Class 10. Attach the Volume and verify `data1`. 11. Generate and write `data2`. 12. Unset the Priority Class Setting. 13. Wait for all the Longhorn system components to restart with the new Priority Class. 14. Verify that UI, manager, and drive deployer don't have Priority Class 15. Attach the Volume and verify `data2`. 16. Generate and write `data3`. Note: system components are workloads other than UI, manager, driver deployer """ client = get_longhorn_api_client() # NOQA count = len(client.list_node()) name = priority_class['metadata']['name'] setting = client.by_id_setting(SETTING_PRIORITY_CLASS) with pytest.raises(Exception) as e: client.update(setting, value=name) assert 'failed to get priority class ' in str(e.value) scheduling_api.create_priority_class(priority_class) volume = create_and_check_volume(client, volume_name) volume.attach(hostId=get_self_host_id()) volume = wait_for_volume_healthy(client, volume_name) with pytest.raises(Exception) as e: client.update(setting, value=name) assert 'cannot modify priority class setting before all volumes are ' \ 'detached' in str(e.value) data1 = write_volume_random_data(volume) check_volume_data(volume, data1) volume.detach(hostId="") wait_for_volume_detached(client, volume_name) setting = client.update(setting, value=name) assert setting.value == name wait_for_priority_class_update(core_api, apps_api, count, priority_class) client, node = wait_for_longhorn_node_ready() volume = client.by_id_volume(volume_name) volume.attach(hostId=node) volume = wait_for_volume_healthy(client, volume_name) check_volume_data(volume, data1) data2 = write_volume_random_data(volume) check_volume_data(volume, data2) volume.detach(hostId="") wait_for_volume_detached(client, volume_name) setting = client.by_id_setting(SETTING_PRIORITY_CLASS) setting = client.update(setting, value='') assert setting.value == '' wait_for_priority_class_update(core_api, apps_api, count) client, node = wait_for_longhorn_node_ready() volume = client.by_id_volume(volume_name) volume.attach(hostId=node) volume = wait_for_volume_healthy(client, volume_name) check_volume_data(volume, data2) data3 = write_volume_random_data(volume) check_volume_data(volume, data3) cleanup_volume(client, volume)
def test_instance_manager_cpu_reservation(client, core_api): # NOQA """ Test if the CPU requests of instance manager pods are controlled by the settings and the node specs correctly. 1. Try to change the deprecated setting `Guaranteed Engine CPU`. --> The setting update should fail. 2. Pick up node 1, set `node.engineManagerCPURequest` and `node.replicaManagerCPURequest` to 150 and 250, respectively. --> The IM pods on this node will be restarted. And the CPU requests of these IM pods matches the above milli value. 3. Change the new settings `Guaranteed Engine Manager CPU` and `Guaranteed Replica Manager CPU` to 10 and 20, respectively. Then wait for all IM pods except for the pods on node 1 restarting. --> The CPU requests of the restarted IM pods equals to the new setting value multiply the kube node allocatable CPU. 4. Set the both new settings to 0. --> All IM pods except for the pod on node 1 will be restarted without CPU requests. 5. Set the fields on node 1 to 0. --> The IM pods on node 1 will be restarted without CPU requests. 6. Set the both new settings to 2 random values, and the sum of the 2 values is small than 40. Then wait for all IM pods restarting. --> The CPU requests of all IM pods equals to the new setting value multiply the kube node allocatable CPU. 7. Set the both new settings to 2 random values, and the single value or the sum of the 2 values is greater than 40. --> The setting update should fail. 8. Create a volume, verify everything works as normal Note: use fixture to restore the setting into the original state """ instance_managers = client.list_instance_manager() deprecated_setting = client.by_id_setting(SETTING_GUARANTEED_ENGINE_CPU) with pytest.raises(Exception) as e: client.update(deprecated_setting, value="0.1") host_node_name = get_self_host_id() host_node = client.by_id_node(host_node_name) other_ems, other_rms = [], [] for im in instance_managers: if im.managerType == "engine": if im.nodeID == host_node_name: em_on_host = im else: other_ems.append(im) else: if im.nodeID == host_node_name: rm_on_host = im else: other_rms.append(im) assert em_on_host and rm_on_host host_kb_node = core_api.read_node(host_node_name) if host_kb_node.status.allocatable["cpu"].endswith('m'): allocatable_millicpu = int(host_kb_node.status.allocatable["cpu"][:-1]) else: allocatable_millicpu = int( host_kb_node.status.allocatable["cpu"]) * 1000 client.update(host_node, allowScheduling=True, engineManagerCPURequest=150, replicaManagerCPURequest=250) time.sleep(5) guaranteed_engine_cpu_setting_check(client, core_api, [em_on_host], "Running", True, "150m") guaranteed_engine_cpu_setting_check(client, core_api, [rm_on_host], "Running", True, "250m") em_setting = client.by_id_setting(SETTING_GUARANTEED_ENGINE_MANAGER_CPU) client.update(em_setting, value="10") rm_setting = client.by_id_setting(SETTING_GUARANTEED_REPLICA_MANAGER_CPU) client.update(rm_setting, value="20") time.sleep(5) guaranteed_engine_cpu_setting_check( client, core_api, other_ems, "Running", True, str(int(allocatable_millicpu * 10 / 100)) + "m") guaranteed_engine_cpu_setting_check( client, core_api, other_rms, "Running", True, str(int(allocatable_millicpu * 20 / 100)) + "m") em_setting = client.by_id_setting(SETTING_GUARANTEED_ENGINE_MANAGER_CPU) client.update(em_setting, value="0") rm_setting = client.by_id_setting(SETTING_GUARANTEED_REPLICA_MANAGER_CPU) client.update(rm_setting, value="0") time.sleep(5) guaranteed_engine_cpu_setting_check(client, core_api, other_ems, "Running", True, "") guaranteed_engine_cpu_setting_check(client, core_api, other_rms, "Running", True, "") ems, rms = other_ems, other_rms ems.append(em_on_host) rms.append(rm_on_host) host_node = client.by_id_node(host_node_name) client.update(host_node, allowScheduling=True, engineManagerCPURequest=0, replicaManagerCPURequest=0) time.sleep(5) guaranteed_engine_cpu_setting_check(client, core_api, ems, "Running", True, "") guaranteed_engine_cpu_setting_check(client, core_api, rms, "Running", True, "") client.update(em_setting, value="20") rm_setting = client.by_id_setting(SETTING_GUARANTEED_REPLICA_MANAGER_CPU) client.update(rm_setting, value="15") time.sleep(5) guaranteed_engine_cpu_setting_check( client, core_api, ems, "Running", True, str(int(allocatable_millicpu * 20 / 100)) + "m") guaranteed_engine_cpu_setting_check( client, core_api, rms, "Running", True, str(int(allocatable_millicpu * 15 / 100)) + "m") with pytest.raises(Exception) as e: client.update(em_setting, value="41") assert "should be between 0 to 40" in \ str(e.value) em_setting = client.by_id_setting(SETTING_GUARANTEED_ENGINE_MANAGER_CPU) with pytest.raises(Exception) as e: client.update(em_setting, value="35") assert "The sum should not be smaller than 0% or greater than 40%" in \ str(e.value) # Create a volume to test vol_name = generate_volume_name() volume = create_and_check_volume(client, vol_name) volume.attach(hostId=get_self_host_id()) volume = wait_for_volume_healthy(client, vol_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) check_volume_data(volume, data) cleanup_volume(client, volume)
def test_upgrade(upgrade_image_tag, settings_reset, volume_name, pod_make, statefulset, storage_class): # NOQA """ Test Longhorn upgrade Prerequisite: - Disable Auto Salvage Setting 1. Find the upgrade image tag 2. Create a volume, generate and write data into the volume. 3. Create a Pod using a volume, generate and write data 4. Create a StatefulSet with 2 replicas, generate and write data to their volumes 5. Keep all volumes attached 6. Upgrade Longhorn system. 7. Check Pod and StatefulSet didn't restart after upgrade 8. Check All volumes data 9. Write data to StatefulSet pods, and Attached volume 10. Check data written to StatefulSet pods, and attached volume. 11. Detach the volume, and Delete Pod, and StatefulSet to detach theirvolumes 12. Upgrade all volumes engine images. 13. Attach the volume, and recreate Pod, and StatefulSet 14. Check All volumes data """ new_ei_name = "longhornio/longhorn-engine:" + upgrade_image_tag client = get_longhorn_api_client() core_api = get_core_api_client() host_id = get_self_host_id() pod_data_path = "/data/test" pod_volume_name = generate_volume_name() auto_salvage_setting = client.by_id_setting(SETTING_AUTO_SALVAGE) setting = client.update(auto_salvage_setting, value="false") assert setting.name == SETTING_AUTO_SALVAGE assert setting.value == "false" # Create Volume attached to a node. volume1 = create_and_check_volume(client, volume_name, size=SIZE) volume1.attach(hostId=host_id) volume1 = wait_for_volume_healthy(client, volume_name) volume1_data = write_volume_random_data(volume1) # Create Volume used by Pod pod_name, pv_name, pvc_name, pod_md5sum = \ prepare_pod_with_data_in_mb(client, core_api, pod_make, pod_volume_name, data_path=pod_data_path, add_liveness_prope=False) # Create multiple volumes used by StatefulSet statefulset_name = 'statefulset-upgrade-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) create_storage_class(storage_class) create_and_wait_statefulset(statefulset) statefulset_pod_info = get_statefulset_pod_info(core_api, statefulset) for sspod_info in statefulset_pod_info: sspod_info['data'] = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, sspod_info['pod_name'], sspod_info['data']) # upgrade Longhorn assert longhorn_upgrade(upgrade_image_tag) client = get_longhorn_api_client() # wait for 1 minute before checking pod restarts time.sleep(60) pod = core_api.read_namespaced_pod(name=pod_name, namespace='default') assert pod.status.container_statuses[0].restart_count == 0 for sspod_info in statefulset_pod_info: sspod = core_api.read_namespaced_pod(name=sspod_info['pod_name'], namespace='default') assert \ sspod.status.container_statuses[0].restart_count == 0 for sspod_info in statefulset_pod_info: resp = read_volume_data(core_api, sspod_info['pod_name']) assert resp == sspod_info['data'] res_pod_md5sum = get_pod_data_md5sum(core_api, pod_name, pod_data_path) assert res_pod_md5sum == pod_md5sum check_volume_data(volume1, volume1_data) for sspod_info in statefulset_pod_info: sspod_info['data'] = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, sspod_info['pod_name'], sspod_info['data']) for sspod_info in statefulset_pod_info: resp = read_volume_data(core_api, sspod_info['pod_name']) assert resp == sspod_info['data'] volume1 = client.by_id_volume(volume_name) volume1_data = write_volume_random_data(volume1) check_volume_data(volume1, volume1_data) statefulset['spec']['replicas'] = replicas = 0 apps_api = get_apps_api_client() apps_api.patch_namespaced_stateful_set( name=statefulset_name, namespace='default', body={ 'spec': { 'replicas': replicas } }) delete_and_wait_pod(core_api, pod_name) volume = client.by_id_volume(volume_name) volume.detach() volumes = client.list_volume() for v in volumes: wait_for_volume_detached(client, v.name) engineimages = client.list_engine_image() for ei in engineimages: if ei.image == new_ei_name: new_ei = ei volumes = client.list_volume() for v in volumes: volume = client.by_id_volume(v.name) volume.engineUpgrade(image=new_ei.image) statefulset['spec']['replicas'] = replicas = 2 apps_api = get_apps_api_client() apps_api.patch_namespaced_stateful_set( name=statefulset_name, namespace='default', body={ 'spec': { 'replicas': replicas } }) wait_statefulset(statefulset) pod = pod_make(name=pod_name) pod['spec']['volumes'] = [create_pvc_spec(pvc_name)] create_and_wait_pod(core_api, pod) volume1 = client.by_id_volume(volume_name) volume1.attach(hostId=host_id) volume1 = wait_for_volume_healthy(client, volume_name) for sspod_info in statefulset_pod_info: resp = read_volume_data(core_api, sspod_info['pod_name']) assert resp == sspod_info['data'] res_pod_md5sum = get_pod_data_md5sum(core_api, pod_name, pod_data_path) assert res_pod_md5sum == pod_md5sum check_volume_data(volume1, volume1_data)
def ha_backup_deletion_recovery_test(client, volume_name, size, base_image=""): # NOQA volume = client.create_volume(name=volume_name, size=size, numberOfReplicas=2, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) setting = client.by_id_setting(common.SETTING_BACKUP_TARGET) # test backupTarget for multiple settings backupstores = common.get_backupstore_url() for backupstore in backupstores: if common.is_backupTarget_s3(backupstore): backupsettings = backupstore.split("$") setting = client.update(setting, value=backupsettings[0]) assert setting["value"] == backupsettings[0] credential = client.by_id_setting( common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET) credential = client.update(credential, value=backupsettings[1]) assert credential["value"] == backupsettings[1] else: setting = client.update(setting, value=backupstore) assert setting["value"] == backupstore credential = client.by_id_setting( common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET) credential = client.update(credential, value="") assert credential["value"] == "" data = write_volume_random_data(volume) snap2 = volume.snapshotCreate() volume.snapshotCreate() volume.snapshotBackup(name=snap2["name"]) _, b = common.find_backup(client, volume_name, snap2["name"]) res_name = common.generate_volume_name() res_volume = client.create_volume(name=res_name, size=size, numberOfReplicas=2, fromBackup=b["url"]) res_volume = common.wait_for_volume_detached(client, res_name) res_volume = res_volume.attach(hostId=host_id) res_volume = common.wait_for_volume_healthy(client, res_name) check_volume_data(res_volume, data) snapshots = res_volume.snapshotList() # only the backup snapshot + volume-head assert len(snapshots) == 2 backup_snapshot = "" for snap in snapshots: if snap["name"] != "volume-head": backup_snapshot = snap["name"] assert backup_snapshot != "" res_volume.snapshotCreate() snapshots = res_volume.snapshotList() assert len(snapshots) == 3 res_volume.snapshotDelete(name=backup_snapshot) res_volume.snapshotPurge() snapshots = res_volume.snapshotList() assert len(snapshots) == 2 ha_rebuild_replica_test(client, res_name) res_volume = res_volume.detach() res_volume = common.wait_for_volume_detached(client, res_name) client.delete(res_volume) common.wait_for_volume_delete(client, res_name) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) client.delete(volume) common.wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0