def backup_test(clients, volume_name, size, base_image=""): # NOQA for host_id, client in clients.iteritems(): break volume = create_and_check_volume(client, volume_name, 2, size, base_image) lht_hostId = get_self_host_id() volume = volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, volume_name) setting = client.by_id_setting(common.SETTING_BACKUP_TARGET) # test backupTarget for multiple settings backupstores = common.get_backupstore_url() for backupstore in backupstores: if common.is_backupTarget_s3(backupstore): backupsettings = backupstore.split("$") setting = client.update(setting, value=backupsettings[0]) assert setting["value"] == backupsettings[0] credential = client.by_id_setting( common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET) credential = client.update(credential, value=backupsettings[1]) assert credential["value"] == backupsettings[1] else: setting = client.update(setting, value=backupstore) assert setting["value"] == backupstore credential = client.by_id_setting( common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET) credential = client.update(credential, value="") assert credential["value"] == "" backupstore_test(client, lht_hostId, volume_name, size) cleanup_volume(client, volume)
def recurring_job_labels_test(client, labels, volume_name, size=SIZE, backing_image=""): # NOQA host_id = get_self_host_id() client.create_volume(name=volume_name, size=size, numberOfReplicas=2, backingImage=backing_image) volume = common.wait_for_volume_detached(client, volume_name) # Simple Backup Job that runs every 1 minute, retains 1. jobs = [{ "name": RECURRING_JOB_NAME, "cron": "*/1 * * * *", "task": "backup", "retain": 1, "labels": labels }] volume.recurringUpdate(jobs=jobs) volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) write_volume_random_data(volume) # 1 minutes 15s time.sleep(75) labels["we-added-this-label"] = "definitely" jobs[0]["labels"] = labels volume = volume.recurringUpdate(jobs=jobs) volume = wait_for_volume_healthy(client, volume_name) write_volume_random_data(volume) # 2 minutes 15s time.sleep(135) snapshots = volume.snapshotList() count = 0 for snapshot in snapshots: if snapshot.removed is False: count += 1 # 1 from Backup, 1 from Volume Head. assert count == 2 # Verify the Labels on the actual Backup. bv = client.by_id_backupVolume(volume_name) backups = bv.backupList().data assert len(backups) == 1 b = bv.backupGet(name=backups[0].name) for key, val in iter(labels.items()): assert b.labels.get(key) == val assert b.labels.get(RECURRING_JOB_LABEL) == RECURRING_JOB_NAME # One extra Label from RecurringJob. assert len(b.labels) == len(labels) + 1 if backing_image: assert b.volumeBackingImageName == \ backing_image assert b.volumeBackingImageURL != "" cleanup_volume(client, volume)
def test_empty_backup_volume(clients): # NOQA for host_id, client in clients.iteritems(): break lht_hostId = get_self_host_id() volName = generate_volume_name() volume = create_and_check_volume(client, volName) volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, volName) bv, b1, snap1, _ = create_backup(client, volName) bv.backupDelete(name=b1["name"]) common.wait_for_backup_delete(b1["name"], bv) backup_list = bv.backupList() assert len(backup_list) == 0 # test the empty backup volume can recreate backup _, b2, snap2, _ = create_backup(client, volName) # test the empty backup volume is still deletable bv.backupDelete(name=b2["name"]) common.wait_for_backup_delete(b1["name"], bv) bv = client.by_id_backupVolume(volName) client.delete(bv) common.wait_for_backup_volume_delete(client, volName) cleanup_volume(client, volume)
def volume_iscsi_basic_test(clients, volume_name, base_image=""): # NOQA # get a random client for host_id, client in clients.iteritems(): break volume = create_and_check_volume(client, volume_name, 3, SIZE, base_image, "iscsi") volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) volumes = client.list_volume() assert len(volumes) == 1 assert volumes[0]["name"] == volume["name"] assert volumes[0]["size"] == volume["size"] assert volumes[0]["numberOfReplicas"] == volume["numberOfReplicas"] assert volumes[0]["state"] == volume["state"] assert volumes[0]["created"] == volume["created"] assert volumes[0]["frontend"] == "iscsi" endpoint = get_volume_endpoint(volumes[0]) assert endpoint.startswith("iscsi://") try: dev = iscsi_login(endpoint) volume_rw_test(dev) finally: iscsi_logout(endpoint) cleanup_volume(client, volume)
def test_hard_anti_affinity_live_rebuild(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity can build new replicas live once a valid node is available. If no nodes without existing replicas are available, the volume should remain in "Degraded" state. However, once one is available, the replica should now be scheduled successfully, with the volume returning to "Healthy" state. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = map(lambda replica: replica.name, volume["replicas"]) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica["name"]) wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) # Allow scheduling on host node again client.update(node, allowScheduling=True) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_soft_anti_affinity_detach(client, volume_name): # NOQA """ Test that volumes with Soft Anti-Affinity can detach and reattach to a node properly. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY) client.update(setting, value="true") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = map(lambda replica: replica.name, volume["replicas"]) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica["name"]) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) volume.detach() volume = wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == 3 volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_soft_anti_affinity_scheduling(client, volume_name): # NOQA """ Test that volumes with Soft Anti-Affinity work as expected. With Soft Anti-Affinity, a new replica should still be scheduled on a node with an existing replica, which will result in "Healthy" state but limited redundancy. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY) client.update(setting, value="true") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = map(lambda replica: replica.name, volume["replicas"]) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica["name"]) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_hard_anti_affinity_offline_rebuild(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity can build new replicas during the attaching process once a valid node is available. Once a new replica has been built as part of the attaching process, the volume should be Healthy again. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = map(lambda replica: replica.name, volume["replicas"]) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica["name"]) volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) volume.detach() volume = wait_for_volume_detached(client, volume_name) client.update(node, allowScheduling=True) volume.attach(hostId=host_id) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_hard_anti_affinity_detach(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity are still able to detach and reattach to a node properly, even in degraded state. 1. Create a volume and attach to the current node 2. Generate and write `data` to the volume. 3. Set `soft anti-affinity` to false 4. Disable current node's scheduling. 5. Remove the replica on the current node 1. Verify volume will be in degraded state. 2. Verify volume reports condition `scheduled == false` 6. Detach the volume. 7. Verify that volume only have 2 replicas 1. Unhealthy replica will be removed upon detach. 8. Attach the volume again. 1. Verify volume will be in degraded state. 2. Verify volume reports condition `scheduled == false` 3. Verify only two of three replicas of volume are healthy. 4. Verify the remaining replica doesn't have `replica.HostID`, meaning it's unscheduled 9. Check volume `data` """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) volume.detach() volume = wait_for_volume_detached(client, volume_name) assert len(volume.replicas) == 2 volume.attach(hostId=host_id) # Make sure we're still not getting another successful replica. volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) assert sum([ 1 for replica in volume.replicas if replica.running and replica.mode == "RW" ]) == 2 assert sum([1 for replica in volume.replicas if not replica.hostId]) == 1 assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def ha_simple_recovery_test(client, volume_name, size, base_image=""): # NOQA volume = create_and_check_volume(client, volume_name, 2, size, base_image) host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) ha_rebuild_replica_test(client, volume_name) cleanup_volume(client, volume)
def test_hard_anti_affinity_scheduling(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity work as expected. With Hard Anti-Affinity, scheduling on nodes with existing replicas should be forbidden, resulting in "Degraded" state. 1. Create a volume and attach to the current node 2. Generate and write `data` to the volume. 3. Set `soft anti-affinity` to false 4. Disable current node's scheduling. 5. Remove the replica on the current node 1. Verify volume will be in degraded state. 2. Verify volume reports condition `scheduled == false` 3. Verify only two of three replicas of volume are healthy. 4. Verify the remaining replica doesn't have `replica.HostID`, meaning it's unscheduled 6. Check volume `data` """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) # Instead of waiting for timeout and lengthening the tests a significant # amount we can make sure the scheduling isn't working by making sure the # volume becomes Degraded and reports a scheduling error. wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) # While there are three replicas that should exist to meet the Volume's # request, only two of those volumes should actually be Healthy. volume = client.by_id_volume(volume_name) assert sum([ 1 for replica in volume.replicas if replica.running and replica.mode == "RW" ]) == 2 # Confirm that the final volume is an unscheduled volume. assert sum([1 for replica in volume.replicas if not replica.hostId]) == 1 # Three replicas in total should still exist. assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def recurring_job_labels_test(client, labels, volume_name, size=SIZE, base_image=""): # NOQA host_id = get_self_host_id() client.create_volume(name=volume_name, size=size, numberOfReplicas=2) volume = common.wait_for_volume_detached(client, volume_name) # Simple Backup Job that runs every 2 minutes, retains 1. jobs = [{ "name": RECURRING_JOB_NAME, "cron": "*/2 * * * *", "task": "backup", "retain": 1, "labels": labels }] volume.recurringUpdate(jobs=jobs) volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) # 5 minutes time.sleep(300) snapshots = volume.snapshotList() count = 0 for snapshot in snapshots: if snapshot["removed"] is False: count += 1 # 1 from Backup, 1 from Volume Head. assert count == 2 # Verify the Labels on the actual Backup. bv = client.by_id_backupVolume(volume_name) backups = bv.backupList() assert len(backups) == 1 b = bv.backupGet(name=backups[0]["name"]) for key, val in labels.iteritems(): assert b["labels"].get(key) == val assert b["labels"].get(RECURRING_JOB_LABEL) == RECURRING_JOB_NAME if base_image: assert b["labels"].get(BASE_IMAGE_LABEL) == base_image # One extra Label from the BaseImage being set. assert len(b["labels"]) == len(labels) + 2 else: # At least one extra Label from RecurringJob. assert len(b["labels"]) == len(labels) + 1 cleanup_volume(client, volume)
def test_ha_prohibit_deleting_last_replica(client, volume_name): # NOQA volume = create_and_check_volume(client, volume_name, 1) host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 1 replica0 = volume["replicas"][0] with pytest.raises(Exception) as e: volume.replicaRemove(name=replica0["name"]) assert "no other healthy replica available" in str(e.value) cleanup_volume(client, volume)
def test_hard_anti_affinity_offline_rebuild(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity can build new replicas during the attaching process once a valid node is available. Once a new replica has been built as part of the attaching process, the volume should be Healthy again. 1. Create a volume and attach to the current node 2. Generate and write `data` to the volume. 3. Set `soft anti-affinity` to false 4. Disable current node's scheduling. 5. Remove the replica on the current node 1. Verify volume will be in degraded state. 2. Verify volume reports condition `scheduled == false` 6. Detach the volume. 7. Enable current node's scheduling. 8. Attach the volume again. 9. Wait for volume to become healthy with 3 replicas 10. Check volume `data` """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = map(lambda replica: replica.name, volume.replicas) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) volume.detach() volume = wait_for_volume_detached(client, volume_name) client.update(node, allowScheduling=True) volume.attach(hostId=host_id) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_tag_scheduling(client, node_default_tags): # NOQA """ Test that scheduling succeeds if there are available Nodes/Disks with the requested Tags. """ host_id = get_self_host_id() tag_specs = [ # Select all Nodes. { "disk": [], "expected": 3, "node": [] }, # Selector works with AND on Disk Tags. { "disk": ["ssd", "nvme"], "expected": 2, "node": [] }, # Selector works with AND on Node Tags. { "disk": [], "expected": 2, "node": ["main", "storage"] }, # Selector works based on combined Disk AND Node selector. { "disk": ["ssd", "nvme"], "expected": 1, "node": ["storage", "main"] } ] for specs in tag_specs: volume_name = generate_volume_name() # NOQA client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=3, diskSelector=specs["disk"], nodeSelector=specs["node"]) volume = wait_for_volume_detached(client, volume_name) assert volume["diskSelector"] == specs["disk"] assert volume["nodeSelector"] == specs["node"] volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 check_volume_replicas(volume, specs, node_default_tags) cleanup_volume(client, volume)
def backup_labels_test(clients, random_labels, volume_name, size=SIZE, base_image=""): # NOQA for _, client in clients.iteritems(): break host_id = get_self_host_id() volume = create_and_check_volume(client, volume_name, 2, size, base_image) volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) setting = client.by_id_setting(common.SETTING_BACKUP_TARGET) # test backupTarget for multiple settings backupstores = common.get_backupstore_url() for backupstore in backupstores: if common.is_backupTarget_s3(backupstore): backupsettings = backupstore.split("$") setting = client.update(setting, value=backupsettings[0]) assert setting["value"] == backupsettings[0] credential = client.by_id_setting( common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET) credential = client.update(credential, value=backupsettings[1]) assert credential["value"] == backupsettings[1] else: setting = client.update(setting, value=backupstore) assert setting["value"] == backupstore credential = client.by_id_setting( common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET) credential = client.update(credential, value="") assert credential["value"] == "" bv, b, _, _ = create_backup(client, volume_name, labels=random_labels) # If we're running the test with a BaseImage, check that this Label is # set properly. backup = bv.backupGet(name=b["name"]) if base_image: assert backup["labels"].get(common.BASE_IMAGE_LABEL) == base_image # One extra Label from the BaseImage being set. assert len(backup["labels"]) == len(random_labels) + 1 else: assert len(backup["labels"]) == len(random_labels) cleanup_volume(client, volume)
def test_tag_scheduling_on_update(client, node_default_tags, volume_name): # NOQA """ Test that Replicas get scheduled if a Node/Disk disks updated with the proper Tags. """ tag_spec = { "disk": ["ssd", "m2"], "expected": 1, "node": ["main", "fallback"] } client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=3, diskSelector=tag_spec["disk"], nodeSelector=tag_spec["node"]) volume = wait_for_volume_detached(client, volume_name) assert volume["diskSelector"] == tag_spec["disk"] assert volume["nodeSelector"] == tag_spec["node"] wait_scheduling_failure(client, volume_name) host_id = get_self_host_id() node = client.by_id_node(host_id) update_disks = get_update_disks(node["disks"]) update_disks[0]["tags"] = tag_spec["disk"] node = node.diskUpdate(disks=update_disks) set_node_tags(client, node, tag_spec["node"]) scheduled = False for i in range(RETRY_COUNTS): v = client.by_id_volume(volume_name) if v["conditions"]["scheduled"]["status"] == "True": scheduled = True if scheduled: break sleep(RETRY_INTERVAL) assert scheduled volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) nodes = client.list_node() node_mapping = {node["id"]: { "disk": get_update_disks(node["disks"])[0]["tags"], "node": node["tags"] } for node in nodes} assert len(volume["replicas"]) == 3 check_volume_replicas(volume, tag_spec, node_mapping) cleanup_volume(client, volume)
def test_hard_anti_affinity_live_rebuild(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity can build new replicas live once a valid node is available. If no nodes without existing replicas are available, the volume should remain in "Degraded" state. However, once one is available, the replica should now be scheduled successfully, with the volume returning to "Healthy" state. 1. Create a volume and attach to the current node 2. Generate and write `data` to the volume. 3. Set `soft anti-affinity` to false 4. Disable current node's scheduling. 5. Remove the replica on the current node 1. Verify volume will be in degraded state. 2. Verify volume reports condition `scheduled == false` 6. Enable the current node's scheduling 7. Wait for volume to start rebuilding and become healthy again 8. Check volume `data` """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = map(lambda replica: replica.name, volume.replicas) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) # Allow scheduling on host node again client.update(node, allowScheduling=True) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_deleting_backup_volume(clients): # NOQA for host_id, client in clients.iteritems(): break lht_hostId = get_self_host_id() volName = generate_volume_name() volume = create_and_check_volume(client, volName) volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, volName) bv, _, snap1, _ = create_backup(client, volName) _, _, snap2, _ = create_backup(client, volName) bv = client.by_id_backupVolume(volName) client.delete(bv) common.wait_for_backup_volume_delete(client, volName) cleanup_volume(client, volume)
def test_soft_anti_affinity_detach(client, volume_name): # NOQA """ Test that volumes with Soft Anti-Affinity can detach and reattach to a node properly. 1. Create a volume and attach to the current node. 2. Generate and write `data` to the volume 3. Set `soft anti-affinity` to true 4. Disable current node's scheduling. 5. Remove the replica on the current node 6. Wait for the new replica to be rebuilt 7. Detach the volume. 8. Verify there are 3 replicas 9. Attach the volume again. Verify there are still 3 replicas 10. Verify the `data`. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="true") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = list(map(lambda replica: replica.name, volume.replicas)) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) volume.detach() volume = wait_for_volume_detached(client, volume_name) assert len(volume.replicas) == 3 volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_hard_anti_affinity_scheduling(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity work as expected. With Hard Anti-Affinity, scheduling on nodes with existing replicas should be forbidden, resulting in "Degraded" state. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica["name"]) # Instead of waiting for timeout and lengthening the tests a significant # amount we can make sure the scheduling isn't working by making sure the # volume becomes Degraded and reports a scheduling error. volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) # While there are three replicas that should exist to meet the Volume's # request, only two of those volumes should actually be Healthy. assert sum([ 1 for replica in volume["replicas"] if replica["running"] and replica["mode"] == "RW" ]) == 2 # Confirm that the final volume is an unscheduled volume. assert sum([1 for replica in volume["replicas"] if not replica["hostId"]]) == 1 # Three replicas in total should still exist. assert len(volume["replicas"]) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_hard_anti_affinity_detach(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity are still able to detach and reattach to a node properly, even in degraded state. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica["name"]) volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) volume.detach() volume = wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == 2 volume.attach(hostId=host_id) # Make sure we're still not getting another successful replica. volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) assert sum([ 1 for replica in volume["replicas"] if replica["running"] and replica["mode"] == "RW" ]) == 2 assert sum([1 for replica in volume["replicas"] if not replica["hostId"]]) == 1 assert len(volume["replicas"]) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_soft_anti_affinity_scheduling(client, volume_name): # NOQA """ Test that volumes with Soft Anti-Affinity work as expected. With Soft Anti-Affinity, a new replica should still be scheduled on a node with an existing replica, which will result in "Healthy" state but limited redundancy. 1. Create a volume and attach to the current node 2. Generate and write `data` to the volume. 3. Set `soft anti-affinity` to true 4. Disable current node's scheduling. 5. Remove the replica on the current node 6. Wait for the volume to complete rebuild. Volume should have 3 replicas. 7. Verify `data` """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="true") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = list(map(lambda replica: replica.name, volume.replicas)) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def ha_salvage_test(client, volume_name, base_image=""): # NOQA volume = create_and_check_volume(client, volume_name, 2, base_image=base_image) host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 2 replica0_name = volume["replicas"][0]["name"] replica1_name = volume["replicas"][1]["name"] data = write_volume_random_data(volume) common.k8s_delete_replica_pods_for_volume(volume_name) volume = common.wait_for_volume_faulted(client, volume_name) assert len(volume["replicas"]) == 2 assert volume["replicas"][0]["failedAt"] != "" assert volume["replicas"][1]["failedAt"] != "" volume.salvage(names=[replica0_name, replica1_name]) volume = common.wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == 2 assert volume["replicas"][0]["failedAt"] == "" assert volume["replicas"][1]["failedAt"] == "" volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) check_volume_data(volume, data) cleanup_volume(client, volume)
def test_backup_kubernetes_status(set_random_backupstore, client, core_api, pod): # NOQA """ Test that Backups have KubernetesStatus stored properly when there is an associated PersistentVolumeClaim and Pod. 1. Setup a random backupstore 2. Set settings Longhorn Static StorageClass to `longhorn-static-test` 3. Create a volume and PV/PVC. Verify the StorageClass of PVC 4. Create a Pod using the PVC. 5. Check volume's Kubernetes status to reflect PV/PVC/Pod correctly. 6. Create a backup for the volume. 7. Verify the labels of created backup reflect PV/PVC/Pod status. 8. Restore the backup to a volume. Wait for restoration to complete. 9. Check the volume's Kubernetes Status 1. Make sure the `lastPodRefAt` and `lastPVCRefAt` is snapshot created time 10. Delete the backup and restored volume. 11. Delete PV/PVC/Pod. 12. Verify volume's Kubernetes Status updated to reflect history data. 13. Attach the volume and create another backup. Verify the labels 14. Verify the volume's Kubernetes status. 15. Restore the previous backup to a new volume. Wait for restoration. 16. Verify the restored volume's Kubernetes status. 1. Make sure `lastPodRefAt` and `lastPVCRefAt` matched volume on step 12 """ host_id = get_self_host_id() static_sc_name = "longhorn-static-test" setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC) setting = client.update(setting, value=static_sc_name) assert setting.value == static_sc_name volume_name = "test-backup-kubernetes-status-pod" # NOQA client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pod_name = "pod-" + volume_name pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') pvc_found = False for item in ret.items: if item.metadata.name == pvc_name: pvc_found = item break assert pvc_found assert pvc_found.spec.storage_class_name == static_sc_name pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) ks = { 'lastPodRefAt': '', 'lastPVCRefAt': '', 'namespace': 'default', 'pvcName': pvc_name, 'pvName': pv_name, 'pvStatus': 'Bound', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, volume_name, ks) volume = wait_for_volume_healthy(client, volume_name) # Create Backup manually instead of calling create_backup since Kubernetes # is not guaranteed to mount our Volume to the test host. snap = create_snapshot(client, volume_name) volume.snapshotBackup(name=snap.name) wait_for_backup_completion(client, volume_name, snap.name) _, b = find_backup(client, volume_name, snap.name) # Check backup label status = loads(b.labels.get(KUBERNETES_STATUS_LABEL)) assert status == ks # Check backup volume label for _ in range(RETRY_COUNTS): bv = client.by_id_backupVolume(volume_name) if bv is not None and bv.labels is not None: break time.sleep(RETRY_INTERVAL) assert bv is not None and bv.labels is not None status = loads(bv.labels.get(KUBERNETES_STATUS_LABEL)) assert status == ks restore_name = generate_volume_name() client.create_volume(name=restore_name, size=SIZE, numberOfReplicas=2, fromBackup=b.url) wait_for_volume_restoration_completed(client, restore_name) wait_for_volume_detached(client, restore_name) snapshot_created = b.snapshotCreated ks = { 'lastPodRefAt': b.snapshotCreated, 'lastPVCRefAt': b.snapshotCreated, 'namespace': 'default', 'pvcName': pvc_name, # Restoration should not apply PersistentVolume data. 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, restore_name, ks) restore = client.by_id_volume(restore_name) # We need to compare LastPodRefAt and LastPVCRefAt manually since # wait_volume_kubernetes_status only checks for empty or non-empty state. assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"] assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"] delete_backup(client, bv.name, b.name) client.delete(restore) wait_for_volume_delete(client, restore_name) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) delete_and_wait_pv(core_api, pv_name) # With the Pod, PVC, and PV deleted, the Volume should have both Ref # fields set. Check that a new Backup and Restore will use this instead of # manually populating the Ref fields. ks = { 'lastPodRefAt': 'NOT NULL', 'lastPVCRefAt': 'NOT NULL', 'namespace': 'default', 'pvcName': pvc_name, 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, volume_name, ks) volume = wait_for_volume_detached(client, volume_name) volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) snap = create_snapshot(client, volume_name) volume.snapshotBackup(name=snap.name) volume = wait_for_backup_completion(client, volume_name, snap.name) bv, b = find_backup(client, volume_name, snap.name) new_b = bv.backupGet(name=b.name) status = loads(new_b.labels.get(KUBERNETES_STATUS_LABEL)) # Check each field manually, we have no idea what the LastPodRefAt or the # LastPVCRefAt will be. We just know it shouldn't be SnapshotCreated. assert status['lastPodRefAt'] != snapshot_created assert status['lastPVCRefAt'] != snapshot_created assert status['namespace'] == "default" assert status['pvcName'] == pvc_name assert status['pvName'] == "" assert status['pvStatus'] == "" assert status['workloadsStatus'] == [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] restore_name = generate_volume_name() client.create_volume(name=restore_name, size=SIZE, numberOfReplicas=2, fromBackup=b.url) wait_for_volume_restoration_completed(client, restore_name) wait_for_volume_detached(client, restore_name) ks = { 'lastPodRefAt': status['lastPodRefAt'], 'lastPVCRefAt': status['lastPVCRefAt'], 'namespace': 'default', 'pvcName': pvc_name, 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, restore_name, ks) restore = client.by_id_volume(restore_name) assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"] assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"] # cleanup backupstore_cleanup(client) client.delete(restore) cleanup_volume(client, volume)
def test_setting_toleration(): """ Test toleration setting 1. Set `taint-toleration` to "key1=value1:NoSchedule; key2:InvalidEffect". 2. Verify the request fails. 3. Create a volume and attach it. 4. Set `taint-toleration` to "key1=value1:NoSchedule; key2:NoExecute". 5. Verify that cannot update toleration setting when any volume is attached. 6. Generate and write `data1` into the volume. 7. Detach the volume. 8. Set `taint-toleration` to "key1=value1:NoSchedule; key2:NoExecute". 9. Wait for all the Longhorn system components to restart with new toleration. 10. Verify that UI, manager, and drive deployer don't restart and don't have new toleration. 11. Attach the volume again and verify the volume `data1`. 12. Generate and write `data2` to the volume. 13. Detach the volume. 14. Clean the `toleration` setting. 15. Wait for all the Longhorn system components to restart with no toleration. 16. Attach the volume and validate `data2`. 17. Generate and write `data3` to the volume. """ client = get_longhorn_api_client() # NOQA apps_api = get_apps_api_client() # NOQA core_api = get_core_api_client() # NOQA count = len(client.list_node()) setting = client.by_id_setting(SETTING_TAINT_TOLERATION) with pytest.raises(Exception) as e: client.update(setting, value="key1=value1:NoSchedule; key2:InvalidEffect") assert 'invalid effect' in str(e.value) volume_name = "test-toleration-vol" # NOQA volume = create_and_check_volume(client, volume_name) volume.attach(hostId=get_self_host_id()) volume = wait_for_volume_healthy(client, volume_name) setting_value_str = "key1=value1:NoSchedule; key2:NoExecute" setting_value_dicts = [ { "key": "key1", "value": "value1", "operator": "Equal", "effect": "NoSchedule" }, { "key": "key2", "value": None, "operator": "Exists", "effect": "NoExecute" }, ] with pytest.raises(Exception) as e: client.update(setting, value=setting_value_str) assert 'cannot modify toleration setting before all volumes are detached' \ in str(e.value) data1 = write_volume_random_data(volume) check_volume_data(volume, data1) volume.detach(hostId="") wait_for_volume_detached(client, volume_name) setting = client.update(setting, value=setting_value_str) assert setting.value == setting_value_str wait_for_toleration_update(core_api, apps_api, count, setting_value_dicts) client, node = wait_for_longhorn_node_ready() volume = client.by_id_volume(volume_name) volume.attach(hostId=node) volume = wait_for_volume_healthy(client, volume_name) check_volume_data(volume, data1) data2 = write_volume_random_data(volume) check_volume_data(volume, data2) volume.detach(hostId="") wait_for_volume_detached(client, volume_name) # cleanup setting_value_str = "" setting_value_dicts = [] setting = client.by_id_setting(SETTING_TAINT_TOLERATION) setting = client.update(setting, value=setting_value_str) assert setting.value == setting_value_str wait_for_toleration_update(core_api, apps_api, count, setting_value_dicts) client, node = wait_for_longhorn_node_ready() volume = client.by_id_volume(volume_name) volume.attach(hostId=node) volume = wait_for_volume_healthy(client, volume_name) check_volume_data(volume, data2) data3 = write_volume_random_data(volume) check_volume_data(volume, data3) cleanup_volume(client, volume)
def test_setting_priority_class(core_api, apps_api, scheduling_api, priority_class, volume_name): # NOQA """ Test that the Priority Class setting is validated and utilized correctly. 1. Verify that the name of a non-existent Priority Class cannot be used for the Setting. 2. Create a new Priority Class in Kubernetes. 3. Create and attach a Volume. 4. Verify that the Priority Class Setting cannot be updated with an attached Volume. 5. Generate and write `data1`. 6. Detach the Volume. 7. Update the Priority Class Setting to the new Priority Class. 8. Wait for all the Longhorn system components to restart with the new Priority Class. 9. Verify that UI, manager, and drive deployer don't have Priority Class 10. Attach the Volume and verify `data1`. 11. Generate and write `data2`. 12. Unset the Priority Class Setting. 13. Wait for all the Longhorn system components to restart with the new Priority Class. 14. Verify that UI, manager, and drive deployer don't have Priority Class 15. Attach the Volume and verify `data2`. 16. Generate and write `data3`. Note: system components are workloads other than UI, manager, driver deployer """ client = get_longhorn_api_client() # NOQA count = len(client.list_node()) name = priority_class['metadata']['name'] setting = client.by_id_setting(SETTING_PRIORITY_CLASS) with pytest.raises(Exception) as e: client.update(setting, value=name) assert 'failed to get priority class ' in str(e.value) scheduling_api.create_priority_class(priority_class) volume = create_and_check_volume(client, volume_name) volume.attach(hostId=get_self_host_id()) volume = wait_for_volume_healthy(client, volume_name) with pytest.raises(Exception) as e: client.update(setting, value=name) assert 'cannot modify priority class setting before all volumes are ' \ 'detached' in str(e.value) data1 = write_volume_random_data(volume) check_volume_data(volume, data1) volume.detach(hostId="") wait_for_volume_detached(client, volume_name) setting = client.update(setting, value=name) assert setting.value == name wait_for_priority_class_update(core_api, apps_api, count, priority_class) client, node = wait_for_longhorn_node_ready() volume = client.by_id_volume(volume_name) volume.attach(hostId=node) volume = wait_for_volume_healthy(client, volume_name) check_volume_data(volume, data1) data2 = write_volume_random_data(volume) check_volume_data(volume, data2) volume.detach(hostId="") wait_for_volume_detached(client, volume_name) setting = client.by_id_setting(SETTING_PRIORITY_CLASS) setting = client.update(setting, value='') assert setting.value == '' wait_for_priority_class_update(core_api, apps_api, count) client, node = wait_for_longhorn_node_ready() volume = client.by_id_volume(volume_name) volume.attach(hostId=node) volume = wait_for_volume_healthy(client, volume_name) check_volume_data(volume, data2) data3 = write_volume_random_data(volume) check_volume_data(volume, data3) cleanup_volume(client, volume)
def test_instance_manager_cpu_reservation(client, core_api): # NOQA """ Test if the CPU requests of instance manager pods are controlled by the settings and the node specs correctly. 1. Try to change the deprecated setting `Guaranteed Engine CPU`. --> The setting update should fail. 2. Pick up node 1, set `node.engineManagerCPURequest` and `node.replicaManagerCPURequest` to 150 and 250, respectively. --> The IM pods on this node will be restarted. And the CPU requests of these IM pods matches the above milli value. 3. Change the new settings `Guaranteed Engine Manager CPU` and `Guaranteed Replica Manager CPU` to 10 and 20, respectively. Then wait for all IM pods except for the pods on node 1 restarting. --> The CPU requests of the restarted IM pods equals to the new setting value multiply the kube node allocatable CPU. 4. Set the both new settings to 0. --> All IM pods except for the pod on node 1 will be restarted without CPU requests. 5. Set the fields on node 1 to 0. --> The IM pods on node 1 will be restarted without CPU requests. 6. Set the both new settings to 2 random values, and the sum of the 2 values is small than 40. Then wait for all IM pods restarting. --> The CPU requests of all IM pods equals to the new setting value multiply the kube node allocatable CPU. 7. Set the both new settings to 2 random values, and the single value or the sum of the 2 values is greater than 40. --> The setting update should fail. 8. Create a volume, verify everything works as normal Note: use fixture to restore the setting into the original state """ instance_managers = client.list_instance_manager() deprecated_setting = client.by_id_setting(SETTING_GUARANTEED_ENGINE_CPU) with pytest.raises(Exception) as e: client.update(deprecated_setting, value="0.1") host_node_name = get_self_host_id() host_node = client.by_id_node(host_node_name) other_ems, other_rms = [], [] for im in instance_managers: if im.managerType == "engine": if im.nodeID == host_node_name: em_on_host = im else: other_ems.append(im) else: if im.nodeID == host_node_name: rm_on_host = im else: other_rms.append(im) assert em_on_host and rm_on_host host_kb_node = core_api.read_node(host_node_name) if host_kb_node.status.allocatable["cpu"].endswith('m'): allocatable_millicpu = int(host_kb_node.status.allocatable["cpu"][:-1]) else: allocatable_millicpu = int( host_kb_node.status.allocatable["cpu"]) * 1000 client.update(host_node, allowScheduling=True, engineManagerCPURequest=150, replicaManagerCPURequest=250) time.sleep(5) guaranteed_engine_cpu_setting_check(client, core_api, [em_on_host], "Running", True, "150m") guaranteed_engine_cpu_setting_check(client, core_api, [rm_on_host], "Running", True, "250m") em_setting = client.by_id_setting(SETTING_GUARANTEED_ENGINE_MANAGER_CPU) client.update(em_setting, value="10") rm_setting = client.by_id_setting(SETTING_GUARANTEED_REPLICA_MANAGER_CPU) client.update(rm_setting, value="20") time.sleep(5) guaranteed_engine_cpu_setting_check( client, core_api, other_ems, "Running", True, str(int(allocatable_millicpu * 10 / 100)) + "m") guaranteed_engine_cpu_setting_check( client, core_api, other_rms, "Running", True, str(int(allocatable_millicpu * 20 / 100)) + "m") em_setting = client.by_id_setting(SETTING_GUARANTEED_ENGINE_MANAGER_CPU) client.update(em_setting, value="0") rm_setting = client.by_id_setting(SETTING_GUARANTEED_REPLICA_MANAGER_CPU) client.update(rm_setting, value="0") time.sleep(5) guaranteed_engine_cpu_setting_check(client, core_api, other_ems, "Running", True, "") guaranteed_engine_cpu_setting_check(client, core_api, other_rms, "Running", True, "") ems, rms = other_ems, other_rms ems.append(em_on_host) rms.append(rm_on_host) host_node = client.by_id_node(host_node_name) client.update(host_node, allowScheduling=True, engineManagerCPURequest=0, replicaManagerCPURequest=0) time.sleep(5) guaranteed_engine_cpu_setting_check(client, core_api, ems, "Running", True, "") guaranteed_engine_cpu_setting_check(client, core_api, rms, "Running", True, "") client.update(em_setting, value="20") rm_setting = client.by_id_setting(SETTING_GUARANTEED_REPLICA_MANAGER_CPU) client.update(rm_setting, value="15") time.sleep(5) guaranteed_engine_cpu_setting_check( client, core_api, ems, "Running", True, str(int(allocatable_millicpu * 20 / 100)) + "m") guaranteed_engine_cpu_setting_check( client, core_api, rms, "Running", True, str(int(allocatable_millicpu * 15 / 100)) + "m") with pytest.raises(Exception) as e: client.update(em_setting, value="41") assert "should be between 0 to 40" in \ str(e.value) em_setting = client.by_id_setting(SETTING_GUARANTEED_ENGINE_MANAGER_CPU) with pytest.raises(Exception) as e: client.update(em_setting, value="35") assert "The sum should not be smaller than 0% or greater than 40%" in \ str(e.value) # Create a volume to test vol_name = generate_volume_name() volume = create_and_check_volume(client, vol_name) volume.attach(hostId=get_self_host_id()) volume = wait_for_volume_healthy(client, vol_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) check_volume_data(volume, data) cleanup_volume(client, volume)
def ha_backup_deletion_recovery_test(client, volume_name, size, base_image=""): # NOQA volume = client.create_volume(name=volume_name, size=size, numberOfReplicas=2, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) setting = client.by_id_setting(common.SETTING_BACKUP_TARGET) # test backupTarget for multiple settings backupstores = common.get_backupstore_url() for backupstore in backupstores: if common.is_backupTarget_s3(backupstore): backupsettings = backupstore.split("$") setting = client.update(setting, value=backupsettings[0]) assert setting["value"] == backupsettings[0] credential = client.by_id_setting( common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET) credential = client.update(credential, value=backupsettings[1]) assert credential["value"] == backupsettings[1] else: setting = client.update(setting, value=backupstore) assert setting["value"] == backupstore credential = client.by_id_setting( common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET) credential = client.update(credential, value="") assert credential["value"] == "" data = write_volume_random_data(volume) snap2 = volume.snapshotCreate() volume.snapshotCreate() volume.snapshotBackup(name=snap2["name"]) _, b = common.find_backup(client, volume_name, snap2["name"]) res_name = common.generate_volume_name() res_volume = client.create_volume(name=res_name, size=size, numberOfReplicas=2, fromBackup=b["url"]) res_volume = common.wait_for_volume_restoration_completed( client, res_name) res_volume = common.wait_for_volume_detached(client, res_name) res_volume = res_volume.attach(hostId=host_id) res_volume = common.wait_for_volume_healthy(client, res_name) check_volume_data(res_volume, data) snapshots = res_volume.snapshotList() # only the backup snapshot + volume-head assert len(snapshots) == 2 backup_snapshot = "" for snap in snapshots: if snap["name"] != "volume-head": backup_snapshot = snap["name"] assert backup_snapshot != "" res_volume.snapshotCreate() snapshots = res_volume.snapshotList() assert len(snapshots) == 3 res_volume.snapshotDelete(name=backup_snapshot) res_volume.snapshotPurge() snapshots = res_volume.snapshotList() assert len(snapshots) == 2 ha_rebuild_replica_test(client, res_name) res_volume = res_volume.detach() res_volume = common.wait_for_volume_detached(client, res_name) client.delete(res_volume) common.wait_for_volume_delete(client, res_name) cleanup_volume(client, volume)
def test_recurring_job_kubernetes_status(client, core_api, volume_name): # NOQA """ Test RecurringJob properly backs up the KubernetesStatus 1. Setup a random backupstore. 2. Create a volume. 3. Create a PV from the volume, and verify the PV status. 4. Create a backup recurring job to run every 2 minutes. 5. Verify the recurring job runs correctly. 6. Verify the backup contains the Kubernetes Status labels """ set_random_backupstore(client) host_id = get_self_host_id() client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = common.wait_for_volume_detached(client, volume_name) pv_name = "pv-" + volume_name create_pv_for_volume(client, core_api, volume, pv_name) ks = { 'pvName': pv_name, 'pvStatus': 'Available', 'namespace': '', 'pvcName': '', 'lastPVCRefAt': '', 'lastPodRefAt': '', } wait_volume_kubernetes_status(client, volume_name, ks) # Simple Backup Job that runs every 2 minutes, retains 1. jobs = [{ "name": RECURRING_JOB_NAME, "cron": "*/2 * * * *", "task": "backup", "retain": 1 }] volume.recurringUpdate(jobs=jobs) volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) # 5 minutes time.sleep(300) snapshots = volume.snapshotList() count = 0 for snapshot in snapshots: if snapshot.removed is False: count += 1 # 1 from Backup, 1 from Volume Head. assert count == 2 # Verify the Labels on the actual Backup. bv = client.by_id_backupVolume(volume_name) backups = bv.backupList().data assert len(backups) == 1 b = bv.backupGet(name=backups[0].name) status = json.loads(b.labels.get(KUBERNETES_STATUS_LABEL)) assert b.labels.get(RECURRING_JOB_LABEL) == RECURRING_JOB_NAME assert status == { 'lastPodRefAt': '', 'lastPVCRefAt': '', 'namespace': '', 'pvcName': '', 'pvName': pv_name, 'pvStatus': 'Available', 'workloadsStatus': None } # Two Labels: KubernetesStatus and RecurringJob. assert len(b.labels) == 2 cleanup_volume(client, volume) delete_and_wait_pv(core_api, pv_name)