def test_statefulset_recurring_backup(set_random_backupstore, client, core_api, storage_class, statefulset): # NOQA """ Scenario : test recurring backups on StatefulSets Given 1 default backup recurring jobs created. When create a statefulset. And write data to every statefulset pod. And wait for 5 minutes. Then 2 snapshots created for every statefulset pod. """ # backup every minute recurring_jobs = { "backup": { "task": "backup", "groups": ["default"], "cron": "* * * * *", "retain": 2, "concurrency": 2, "labels": {}, }, } create_recurring_jobs(client, recurring_jobs) check_recurring_jobs(client, recurring_jobs) statefulset_name = 'statefulset-backup-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) create_storage_class(storage_class) create_and_wait_statefulset(statefulset) pod_data = get_statefulset_pod_info(core_api, statefulset) for pod in pod_data: pod['data'] = generate_random_data(VOLUME_RWTEST_SIZE) pod['backup_snapshot'] = '' for pod in pod_data: volume = client.by_id_volume(pod['pv_name']) write_pod_volume_data(core_api, pod['pod_name'], pod['data']) time.sleep(150) for pod in pod_data: volume = client.by_id_volume(pod['pv_name']) write_pod_volume_data(core_api, pod['pod_name'], pod['data']) time.sleep(150) for pod in pod_data: volume = client.by_id_volume(pod['pv_name']) snapshots = volume.snapshotList() count = 0 for snapshot in snapshots: if snapshot.removed is False: count += 1 # one backup + volume-head assert count == 2
def test_statefulset_recurring_backup( client, core_api, storage_class, # NOQA statefulset): # NOQA """ Test that recurring backups on StatefulSets work properly. 1. Create a StatefulSet with VolumeClaimTemplate and Longhorn. 2. Wait for pods to run. 3. Write some data to every pod 4. Schedule recurring jobs for volumes using Longhorn API 5. Wait for 5 minutes 6. Verify the snapshots created by the recurring jobs. """ statefulset_name = 'statefulset-backup-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) create_storage_class(storage_class) create_and_wait_statefulset(statefulset) # backup every minute job_backup = { "name": "backup", "cron": "* * * * *", "task": "backup", "retain": 2 } pod_data = get_statefulset_pod_info(core_api, statefulset) for pod in pod_data: pod['data'] = generate_random_data(VOLUME_RWTEST_SIZE) pod['backup_snapshot'] = '' for pod in pod_data: volume = client.by_id_volume(pod['pv_name']) write_pod_volume_data(core_api, pod['pod_name'], pod['data']) volume.recurringUpdate(jobs=[job_backup]) time.sleep(150) for pod in pod_data: volume = client.by_id_volume(pod['pv_name']) write_pod_volume_data(core_api, pod['pod_name'], pod['data']) volume.recurringUpdate(jobs=[job_backup]) time.sleep(150) for pod in pod_data: volume = client.by_id_volume(pod['pv_name']) snapshots = volume.snapshotList() count = 0 for snapshot in snapshots: if snapshot.removed is False: count += 1 # one backups + volume-head assert count == 2
def test_pv_creation(client, core_api): # NOQA volume_name = "test-pv-creation" client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pv_name = "pv-" + volume_name volume.pvCreate(pvName=pv_name) for i in range(RETRY_COUNTS): if check_pv_existence(core_api, pv_name): break time.sleep(RETRY_INTERVAL) assert check_pv_existence(core_api, pv_name) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] for i in range(RETRY_COUNTS): if k_status['pvName'] and k_status['pvStatus'] == 'Available': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == pv_name assert k_status['pvStatus'] == 'Available' assert not k_status['namespace'] assert not k_status['pvcName'] assert not workloads assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] # try to create one more pv for the volume pv_name_2 = "pv2-" + volume_name with pytest.raises(Exception) as e: volume.pvCreate(pvName=pv_name_2) assert "already exist" in str(e.value) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == pv_name assert k_status['pvStatus'] == 'Available' assert not k_status['namespace'] assert not k_status['pvcName'] assert not workloads assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] delete_and_wait_pv(core_api, pv_name)
def test_csi_expansion_with_size_round_up(client, core_api): # NOQA """ test expand longhorn volume 1. Create longhorn volume with size '1Gi' 2. Attach, write data, and detach 3. Expand volume size to '2000000000/2G' and check if size round up '2000683008' 4. Attach, write data, and detach 5. Expand volume size to '2Gi' and check if size is '2147483648' 6. Attach, write data, and detach """ volume_name = generate_volume_name() volume = create_and_check_volume(client, volume_name, 2, str(1 * Gi)) self_hostId = get_self_host_id() volume.attach(hostId=self_hostId, disableFrontend=False) volume = wait_for_volume_healthy(client, volume_name) test_data = write_volume_random_data(volume) volume.detach(hostId="") volume = wait_for_volume_detached(client, volume_name) volume.expand(size="2000000000") wait_for_volume_expansion(client, volume_name) volume = client.by_id_volume(volume_name) assert volume.size == "2000683008" self_hostId = get_self_host_id() volume.attach(hostId=self_hostId, disableFrontend=False) volume = wait_for_volume_healthy(client, volume_name) check_volume_data(volume, test_data, False) test_data = write_volume_random_data(volume) volume.detach(hostId="") volume = wait_for_volume_detached(client, volume_name) volume.expand(size=str(2 * Gi)) wait_for_volume_expansion(client, volume_name) volume = client.by_id_volume(volume_name) assert volume.size == "2147483648" self_hostId = get_self_host_id() volume.attach(hostId=self_hostId, disableFrontend=False) volume = wait_for_volume_healthy(client, volume_name) check_volume_data(volume, test_data, False) volume.detach(hostId="") volume = wait_for_volume_detached(client, volume_name) client.delete(volume) wait_for_volume_delete(client, volume_name)
def test_replica_zone_anti_affinity(client, core_api, volume_name, k8s_node_zone_tags): # NOQA """ Test replica scheduler with zone anti-affinity 1. Set zone anti-affinity to hard. 2. Label nodes 1 & 2 with same zone label "zone1". Label node 3 with zone label "zone2". 3. Create a volume with 3 replicas. 4. Wait for volume condition `scheduled` to be false. 5. Label node 2 with zone label "zone3". 6. Wait for volume condition `scheduled` to be success. 7. Clear the volume. 8. Set zone anti-affinity to soft. 9. Change the zone labels on node 1 & 2 & 3 to "zone1". 10. Create a volume. 11. Wait for volume condition `scheduled` to be success. 12. Clean up the replica count, the zone labels and the volume. """ wait_longhorn_node_zone_updated(client) replica_node_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(replica_node_soft_anti_affinity_setting, value="false") replica_zone_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY) client.update(replica_zone_soft_anti_affinity_setting, value="false") volume = create_and_check_volume(client, volume_name) lh_nodes = client.list_node() count = 0 for node in lh_nodes: count += 1 set_k8s_node_zone_label(core_api, node.name, "lh-zone" + str(count)) wait_longhorn_node_zone_updated(client) wait_for_volume_condition_scheduled(client, volume_name, "status", CONDITION_STATUS_TRUE) replica_zone_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY) client.update(replica_zone_soft_anti_affinity_setting, value="true") volume = client.by_id_volume(volume_name) client.delete(volume) wait_for_volume_delete(client, volume_name) for node in lh_nodes: set_k8s_node_zone_label(core_api, node.name, "lh-zone1") wait_longhorn_node_zone_updated(client) volume = create_and_check_volume(client, volume_name) wait_for_volume_condition_scheduled(client, volume_name, "status", CONDITION_STATUS_TRUE)
def test_recurring_job_in_storageclass(client, core_api, storage_class, statefulset): # NOQA """ Test create volume with StorageClass contains recurring jobs 1. Create a StorageClass with recurring jobs 2. Create a StatefulSet with PVC template and StorageClass 3. Verify the recurring jobs run correctly. """ set_random_backupstore(client) statefulset_name = 'recurring-job-in-storageclass-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) storage_class["parameters"]["recurringJobs"] = json.dumps(create_jobs1()) create_storage_class(storage_class) create_and_wait_statefulset(statefulset) pod_info = get_statefulset_pod_info(core_api, statefulset) volume_info = [p['pv_name'] for p in pod_info] # 5 minutes time.sleep(300) for volume_name in volume_info: # NOQA volume = client.by_id_volume(volume_name) check_jobs1_result(volume)
def backupstore_test(client, core_api, csi_pv, pvc, pod_make, pod_name, base_image, test_data, i): # NOQA vol_name = csi_pv['metadata']['name'] write_pod_volume_data(core_api, pod_name, test_data) volume = client.by_id_volume(vol_name) snap = volume.snapshotCreate() volume.snapshotBackup(name=snap["name"]) bv, b = common.find_backup(client, vol_name, snap["name"]) pod2_name = 'csi-backup-test-' + str(i) create_and_wait_csi_pod(pod2_name, client, core_api, csi_pv, pvc, pod_make, base_image, b["url"]) resp = read_volume_data(core_api, pod2_name) assert resp == test_data bv.backupDelete(name=b["name"]) backups = bv.backupList() found = False for b in backups: if b["snapshotName"] == snap["name"]: found = True break assert not found
def backupstore_test(client, core_api, csi_pv, pvc, pod_make, pod_name, base_image, test_data, i): # NOQA vol_name = csi_pv['metadata']['name'] write_volume_data(core_api, pod_name, test_data) volume = client.by_id_volume(vol_name) snap = volume.snapshotCreate() volume.snapshotBackup(name=snap["name"]) bv, b = common.find_backup(client, vol_name, snap["name"]) pod2_name = 'csi-backup-test-' + str(i) create_and_wait_csi_pod(pod2_name, client, core_api, csi_pv, pvc, pod_make, base_image, b["url"]) resp = read_volume_data(core_api, pod2_name) assert resp == test_data bv.backupDelete(name=b["name"]) backups = bv.backupList() found = False for b in backups: if b["snapshotName"] == snap["name"]: found = True break assert not found
def wait_for_recurring_backup_to_start(client, core_api, volume_name, expected_snapshot_count, minimum_progress=0): # NOQA job_pod_name = volume_name + '-backup-c' snapshot_name = '' snapshots = [] check_pod_existence(core_api, job_pod_name, namespace=LONGHORN_NAMESPACE) # Find the snapshot which is being backed up for _ in range(RETRY_BACKUP_COUNTS): volume = client.by_id_volume(volume_name) try: snapshots = volume.snapshotList() assert len(snapshots) == expected_snapshot_count + 1 for snapshot in snapshots: if snapshot.children['volume-head']: snapshot_name = snapshot.name break if len(snapshot_name) != 0: break except (AttributeError, ApiException, AssertionError): time.sleep(RETRY_BACKUP_INTERVAL) assert len(snapshot_name) != 0 # To ensure the progress of backup common.wait_for_backup_to_start(client, volume_name, snapshot_name=snapshot_name, chk_progress=minimum_progress) return snapshot_name
def ha_rebuild_replica_test(client, volname): # NOQA volume = client.by_id_volume(volname) assert get_volume_endpoint(volume) == DEV_PATH + volname assert len(volume["replicas"]) == 2 replica0 = volume["replicas"][0] assert replica0["name"] != "" replica1 = volume["replicas"][1] assert replica1["name"] != "" data = write_volume_random_data(volume) volume = volume.replicaRemove(name=replica0["name"]) # wait until we saw a replica starts rebuilding new_replica_found = False for i in range(RETRY_COUNTS): v = client.by_id_volume(volname) for r in v["replicas"]: if r["name"] != replica0["name"] and \ r["name"] != replica1["name"]: new_replica_found = True break if new_replica_found: break time.sleep(RETRY_INTERVAL) assert new_replica_found volume = common.wait_for_volume_healthy(client, volname) volume = client.by_id_volume(volname) assert volume["state"] == common.VOLUME_STATE_ATTACHED assert volume["robustness"] == common.VOLUME_ROBUSTNESS_HEALTHY assert len(volume["replicas"]) >= 2 found = False for replica in volume["replicas"]: if replica["name"] == replica1["name"]: found = True break assert found check_volume_data(volume, data)
def test_statefulset_recurring_backup( client, core_api, storage_class, # NOQA statefulset): # NOQA """ Test that recurring backups on StatefulSets work properly. """ statefulset_name = 'statefulset-backup-test' update_test_manifests(statefulset, storage_class, statefulset_name) create_storage_class(storage_class) create_and_wait_statefulset(statefulset) # backup every minute job_backup = { "name": "backup", "cron": "* * * * *", "task": "backup", "retain": 2 } pod_data = get_statefulset_pod_info(core_api, statefulset) for pod in pod_data: pod['data'] = generate_random_data(VOLUME_RWTEST_SIZE) pod['backup_snapshot'] = '' for pod in pod_data: volume = client.by_id_volume(pod['pv_name']) write_volume_data(core_api, pod['pod_name'], pod['data']) volume.recurringUpdate(jobs=[job_backup]) time.sleep(300) for pod in pod_data: volume = client.by_id_volume(pod['pv_name']) snapshots = volume.snapshotList() count = 0 for snapshot in snapshots: if snapshot['removed'] is False: count += 1 # two backups + volume-head assert count == 3
def get_zone_replica_count(client, volume_name, zone_name): # NOQA volume = client.by_id_volume(volume_name) zone_replica_count = 0 for replica in volume.replicas: replica_host_id = replica.hostId replica_host_zone = client.by_id_node(replica_host_id).zone if replica_host_zone == zone_name: zone_replica_count += 1 return zone_replica_count
def get_zone_replica_count(client, volume_name, zone_name, chk_running=False): # NOQA volume = client.by_id_volume(volume_name) zone_replica_count = 0 for replica in volume.replicas: if chk_running and not replica.running: continue replica_host_id = replica.hostId replica_host_zone = client.by_id_node(replica_host_id).zone if replica_host_zone == zone_name: zone_replica_count += 1 return zone_replica_count
def test_hard_anti_affinity_scheduling(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity work as expected. With Hard Anti-Affinity, scheduling on nodes with existing replicas should be forbidden, resulting in "Degraded" state. 1. Create a volume and attach to the current node 2. Generate and write `data` to the volume. 3. Set `soft anti-affinity` to false 4. Disable current node's scheduling. 5. Remove the replica on the current node 1. Verify volume will be in degraded state. 2. Verify volume reports condition `scheduled == false` 3. Verify only two of three replicas of volume are healthy. 4. Verify the remaining replica doesn't have `replica.HostID`, meaning it's unscheduled 6. Check volume `data` """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) # Instead of waiting for timeout and lengthening the tests a significant # amount we can make sure the scheduling isn't working by making sure the # volume becomes Degraded and reports a scheduling error. wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) # While there are three replicas that should exist to meet the Volume's # request, only two of those volumes should actually be Healthy. volume = client.by_id_volume(volume_name) assert sum([ 1 for replica in volume.replicas if replica.running and replica.mode == "RW" ]) == 2 # Confirm that the final volume is an unscheduled volume. assert sum([1 for replica in volume.replicas if not replica.hostId]) == 1 # Three replicas in total should still exist. assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def backupstore_test(client, core_api, csi_pv, pvc, pod_make, pod_name, vol_name, backing_image, test_data): # NOQA write_pod_volume_data(core_api, pod_name, test_data) volume = client.by_id_volume(vol_name) snap = create_snapshot(client, vol_name) volume.snapshotBackup(name=snap.name) common.wait_for_backup_completion(client, vol_name, snap.name) bv, b = common.find_backup(client, vol_name, snap.name) pod2_name = 'csi-backup-test-2' vol2_name = create_and_wait_csi_pod( pod2_name, client, core_api, csi_pv, pvc, pod_make, backing_image, b.url) volume2 = client.by_id_volume(vol2_name) resp = read_volume_data(core_api, pod2_name) assert resp == test_data delete_backup(client, bv.name, b.name) delete_and_wait_pod(core_api, pod2_name) client.delete(volume2)
def test_statefulset_recurring_backup(client, core_api, storage_class, # NOQA statefulset): # NOQA """ Test that recurring backups on StatefulSets work properly. """ statefulset_name = 'statefulset-backup-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) create_storage_class(storage_class) create_and_wait_statefulset(statefulset) # backup every minute job_backup = {"name": "backup", "cron": "* * * * *", "task": "backup", "retain": 2} pod_data = get_statefulset_pod_info(core_api, statefulset) for pod in pod_data: pod['data'] = generate_random_data(VOLUME_RWTEST_SIZE) pod['backup_snapshot'] = '' for pod in pod_data: volume = client.by_id_volume(pod['pv_name']) write_volume_data(core_api, pod['pod_name'], pod['data']) volume.recurringUpdate(jobs=[job_backup]) time.sleep(300) for pod in pod_data: volume = client.by_id_volume(pod['pv_name']) snapshots = volume.snapshotList() count = 0 for snapshot in snapshots: if snapshot['removed'] is False: count += 1 # two backups + volume-head assert count == 3
def test_replica_rebuild_per_volume_limit(client, core_api, storage_class, sts_name, statefulset): # NOQA """ Test the volume always only have one replica scheduled for rebuild 1. Set soft anti-affinity to `true`. 2. Create a volume with 1 replica. 3. Attach the volume and write a few hundreds MB data to it. 4. Scale the volume replica to 5. 5. Constantly checking the volume replica list to make sure there should be only 1 replica in WO state. 6. Wait for the volume to complete rebuilding. Then remove 4 of the 5 replicas. 7. Monitoring the volume replica list again. 8. Once the rebuild was completed again, verify the data checksum. """ replica_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(replica_soft_anti_affinity_setting, value="true") data_path = '/data/test' storage_class['parameters']['numberOfReplicas'] = "1" vol_name, pod_name, md5sum = \ common.prepare_statefulset_with_data_in_mb( client, core_api, statefulset, sts_name, storage_class, data_path=data_path, data_size_in_mb=DATA_SIZE_IN_MB_2) # Scale the volume replica to 5 r_count = 5 vol = client.by_id_volume(vol_name) vol.updateReplicaCount(replicaCount=r_count) vol = common.wait_for_volume_replicas_mode(client, vol_name, 'RW', replica_count=r_count) # Delete 4 volume replicas del vol.replicas[0] for r in vol.replicas: vol.replicaRemove(name=r.name) r_count = 1 common.wait_for_volume_replicas_mode(client, vol_name, 'RW', replica_count=r_count) assert md5sum == common.get_pod_data_md5sum(core_api, pod_name, data_path)
def test_recurring_job_in_storageclass(client, core_api, storage_class, statefulset): # NOQA statefulset_name = 'recurring-job-in-storageclass-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) storage_class['parameters']['recurringJobs'] = json.dumps(create_jobs1()) create_storage_class(storage_class) create_and_wait_statefulset(statefulset) pod_info = get_statefulset_pod_info(core_api, statefulset) volume_info = [p['pv_name'] for p in pod_info] # 5 minutes time.sleep(300) for volume_name in volume_info: # NOQA volume = client.by_id_volume(volume_name) check_jobs1_result(volume)
def test_tag_scheduling_on_update(client, node_default_tags, volume_name): # NOQA """ Test that Replicas get scheduled if a Node/Disk disks updated with the proper Tags. """ tag_spec = { "disk": ["ssd", "m2"], "expected": 1, "node": ["main", "fallback"] } client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=3, diskSelector=tag_spec["disk"], nodeSelector=tag_spec["node"]) volume = wait_for_volume_detached(client, volume_name) assert volume["diskSelector"] == tag_spec["disk"] assert volume["nodeSelector"] == tag_spec["node"] wait_scheduling_failure(client, volume_name) host_id = get_self_host_id() node = client.by_id_node(host_id) update_disks = get_update_disks(node["disks"]) update_disks[0]["tags"] = tag_spec["disk"] node = node.diskUpdate(disks=update_disks) set_node_tags(client, node, tag_spec["node"]) scheduled = False for i in range(RETRY_COUNTS): v = client.by_id_volume(volume_name) if v["conditions"]["scheduled"]["status"] == "True": scheduled = True if scheduled: break sleep(RETRY_INTERVAL) assert scheduled volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) nodes = client.list_node() node_mapping = {node["id"]: { "disk": get_update_disks(node["disks"])[0]["tags"], "node": node["tags"] } for node in nodes} assert len(volume["replicas"]) == 3 check_volume_replicas(volume, tag_spec, node_mapping) cleanup_volume(client, volume)
def backupstore_test(client, core_api, csi_pv, pvc, pod_make, pod_name, base_image, test_data, i): # NOQA vol_name = csi_pv['metadata']['name'] write_pod_volume_data(core_api, pod_name, test_data) volume = client.by_id_volume(vol_name) snap = create_snapshot(client, vol_name) volume.snapshotBackup(name=snap.name) common.wait_for_backup_completion(client, vol_name, snap.name) bv, b = common.find_backup(client, vol_name, snap.name) pod2_name = 'csi-backup-test-' + str(i) create_and_wait_csi_pod(pod2_name, client, core_api, csi_pv, pvc, pod_make, base_image, b.url) resp = read_volume_data(core_api, pod2_name) assert resp == test_data delete_backup(client, bv.name, b.name)
def test_recurring_job_in_storageclass(set_random_backupstore, client, core_api, storage_class, statefulset): # NOQA """ Test create volume with StorageClass contains recurring jobs 1. Create a StorageClass with recurring jobs 2. Create a StatefulSet with PVC template and StorageClass 3. Verify the recurring jobs run correctly. """ statefulset_name = 'recurring-job-in-storageclass-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) storage_class["parameters"]["recurringJobs"] = json.dumps(create_jobs1()) create_storage_class(storage_class) # wait until the beginning of an even minute wait_until_begin_of_an_even_minute() start_time = datetime.utcnow() create_and_wait_statefulset(statefulset) statefulset_creating_duration = datetime.utcnow() - start_time assert 150 > statefulset_creating_duration.seconds # We want to write data exactly at the 150th second since the start_time time.sleep(150 - statefulset_creating_duration.seconds) pod_info = get_statefulset_pod_info(core_api, statefulset) volume_info = [p['pv_name'] for p in pod_info] pod_names = [p['pod_name'] for p in pod_info] # write random data to volume to trigger recurring snapshot and backup job volume_data_path = "/data/test" for pod_name in pod_names: write_pod_volume_random_data(core_api, pod_name, volume_data_path, 2) time.sleep(150) # 2.5 minutes for volume_name in volume_info: # NOQA volume = client.by_id_volume(volume_name) check_jobs1_result(volume)
def wait_new_replica_ready(client, volume_name, replica_names): # NOQA """ Wait for a new replica to be found on the specified volume. Trigger a failed assertion if one can't be found. :param client: The Longhorn client to use in the request. :param volume_name: The name of the volume. :param replica_names: The list of names of the volume's old replicas. """ new_replica_ready = False for _ in range(RETRY_COUNTS): v = client.by_id_volume(volume_name) for r in v["replicas"]: if r["name"] not in replica_names and r["running"] and \ r["mode"] == "RW": new_replica_ready = True break if new_replica_ready: break sleep(RETRY_INTERVAL) assert new_replica_ready
def create_volume(client, vol_name, size, node_id, r_num): # NOQA volume = client.create_volume(name=vol_name, size=size, numberOfReplicas=r_num) assert volume["numberOfReplicas"] == r_num assert volume["frontend"] == "blockdev" volume = common.wait_for_volume_detached(client, vol_name) assert len(volume["replicas"]) == r_num assert volume["state"] == "detached" assert volume["created"] != "" volumeByName = client.by_id_volume(vol_name) assert volumeByName["name"] == volume["name"] assert volumeByName["size"] == volume["size"] assert volumeByName["numberOfReplicas"] == volume["numberOfReplicas"] assert volumeByName["state"] == volume["state"] assert volumeByName["created"] == volume["created"] volume.attach(hostId=node_id) volume = common.wait_for_volume_healthy(client, vol_name) return volume
def test_delete_with_provisioned_pv(client, core_api, storage_class, pvc): # NOQA """ Test that deleting a Volume with dynamically provisioned Persistent Volume and Persistent Volume Claim resources successfully deletes the Volume and cleans up those resources. 1. Create a Storage Class to test with. 2. Create a Persistent Volume Claim that requests a Volume from that Storage Class. 3. Wait for the Volume to be provisioned and for the Kubernetes Status to be updated correctly. 4. Attempt to delete the Volume. 5. Verify that the Volume and its associated resources have been deleted. """ pv = provision_and_wait_pv(client, core_api, storage_class, pvc) pv_name = pv.metadata.name volume_name = pv.spec.csi.volume_handle # NOQA volume = client.by_id_volume(volume_name) client.delete(volume) wait_for_volume_delete(client, volume_name) wait_delete_pv(core_api, pv_name) wait_delete_pvc(core_api, pvc['metadata']['name'])
def test_backup_kubernetes_status(set_random_backupstore, client, core_api, pod): # NOQA """ Test that Backups have KubernetesStatus stored properly when there is an associated PersistentVolumeClaim and Pod. 1. Setup a random backupstore 2. Set settings Longhorn Static StorageClass to `longhorn-static-test` 3. Create a volume and PV/PVC. Verify the StorageClass of PVC 4. Create a Pod using the PVC. 5. Check volume's Kubernetes status to reflect PV/PVC/Pod correctly. 6. Create a backup for the volume. 7. Verify the labels of created backup reflect PV/PVC/Pod status. 8. Restore the backup to a volume. Wait for restoration to complete. 9. Check the volume's Kubernetes Status 1. Make sure the `lastPodRefAt` and `lastPVCRefAt` is snapshot created time 10. Delete the backup and restored volume. 11. Delete PV/PVC/Pod. 12. Verify volume's Kubernetes Status updated to reflect history data. 13. Attach the volume and create another backup. Verify the labels 14. Verify the volume's Kubernetes status. 15. Restore the previous backup to a new volume. Wait for restoration. 16. Verify the restored volume's Kubernetes status. 1. Make sure `lastPodRefAt` and `lastPVCRefAt` matched volume on step 12 """ host_id = get_self_host_id() static_sc_name = "longhorn-static-test" setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC) setting = client.update(setting, value=static_sc_name) assert setting.value == static_sc_name volume_name = "test-backup-kubernetes-status-pod" # NOQA client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pod_name = "pod-" + volume_name pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') pvc_found = False for item in ret.items: if item.metadata.name == pvc_name: pvc_found = item break assert pvc_found assert pvc_found.spec.storage_class_name == static_sc_name pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) ks = { 'lastPodRefAt': '', 'lastPVCRefAt': '', 'namespace': 'default', 'pvcName': pvc_name, 'pvName': pv_name, 'pvStatus': 'Bound', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, volume_name, ks) volume = wait_for_volume_healthy(client, volume_name) # Create Backup manually instead of calling create_backup since Kubernetes # is not guaranteed to mount our Volume to the test host. snap = create_snapshot(client, volume_name) volume.snapshotBackup(name=snap.name) wait_for_backup_completion(client, volume_name, snap.name) _, b = find_backup(client, volume_name, snap.name) # Check backup label status = loads(b.labels.get(KUBERNETES_STATUS_LABEL)) assert status == ks # Check backup volume label for _ in range(RETRY_COUNTS): bv = client.by_id_backupVolume(volume_name) if bv is not None and bv.labels is not None: break time.sleep(RETRY_INTERVAL) assert bv is not None and bv.labels is not None status = loads(bv.labels.get(KUBERNETES_STATUS_LABEL)) assert status == ks restore_name = generate_volume_name() client.create_volume(name=restore_name, size=SIZE, numberOfReplicas=2, fromBackup=b.url) wait_for_volume_restoration_completed(client, restore_name) wait_for_volume_detached(client, restore_name) snapshot_created = b.snapshotCreated ks = { 'lastPodRefAt': b.snapshotCreated, 'lastPVCRefAt': b.snapshotCreated, 'namespace': 'default', 'pvcName': pvc_name, # Restoration should not apply PersistentVolume data. 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, restore_name, ks) restore = client.by_id_volume(restore_name) # We need to compare LastPodRefAt and LastPVCRefAt manually since # wait_volume_kubernetes_status only checks for empty or non-empty state. assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"] assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"] delete_backup(client, bv.name, b.name) client.delete(restore) wait_for_volume_delete(client, restore_name) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) delete_and_wait_pv(core_api, pv_name) # With the Pod, PVC, and PV deleted, the Volume should have both Ref # fields set. Check that a new Backup and Restore will use this instead of # manually populating the Ref fields. ks = { 'lastPodRefAt': 'NOT NULL', 'lastPVCRefAt': 'NOT NULL', 'namespace': 'default', 'pvcName': pvc_name, 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, volume_name, ks) volume = wait_for_volume_detached(client, volume_name) volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) snap = create_snapshot(client, volume_name) volume.snapshotBackup(name=snap.name) volume = wait_for_backup_completion(client, volume_name, snap.name) bv, b = find_backup(client, volume_name, snap.name) new_b = bv.backupGet(name=b.name) status = loads(new_b.labels.get(KUBERNETES_STATUS_LABEL)) # Check each field manually, we have no idea what the LastPodRefAt or the # LastPVCRefAt will be. We just know it shouldn't be SnapshotCreated. assert status['lastPodRefAt'] != snapshot_created assert status['lastPVCRefAt'] != snapshot_created assert status['namespace'] == "default" assert status['pvcName'] == pvc_name assert status['pvName'] == "" assert status['pvStatus'] == "" assert status['workloadsStatus'] == [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] restore_name = generate_volume_name() client.create_volume(name=restore_name, size=SIZE, numberOfReplicas=2, fromBackup=b.url) wait_for_volume_restoration_completed(client, restore_name) wait_for_volume_detached(client, restore_name) ks = { 'lastPodRefAt': status['lastPodRefAt'], 'lastPVCRefAt': status['lastPVCRefAt'], 'namespace': 'default', 'pvcName': pvc_name, 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, restore_name, ks) restore = client.by_id_volume(restore_name) assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"] assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"] # cleanup backupstore_cleanup(client) client.delete(restore) cleanup_volume(client, volume)
def engine_live_upgrade_rollback_test(client, volume_name, base_image=""): # NOQA default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) cli_v = default_img["cliAPIVersion"] cli_minv = default_img["cliAPIMinVersion"] ctl_v = default_img["controllerAPIVersion"] ctl_minv = default_img["controllerAPIMinVersion"] data_v = default_img["dataFormatVersion"] data_minv = default_img["dataFormatMinVersion"] wrong_engine_upgrade_image = common.get_compatibility_test_image( cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv) new_img = client.create_engine_image(image=wrong_engine_upgrade_image) new_img_name = new_img["name"] new_img = wait_for_engine_image_state(client, new_img_name, "ready") assert new_img["refCount"] == 0 assert new_img["noRefSince"] != "" default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) default_img = wait_for_engine_image_ref_count(client, default_img_name, 1) assert volume["baseImage"] == base_image original_engine_image = volume["engineImage"] assert original_engine_image != wrong_engine_upgrade_image host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) data = write_volume_random_data(volume) volume.engineUpgrade(image=wrong_engine_upgrade_image) volume = client.by_id_volume(volume["name"]) assert volume["engineImage"] == wrong_engine_upgrade_image assert volume["currentImage"] == original_engine_image with pytest.raises(Exception): # this will timeout wait_for_volume_current_image(client, volume_name, wrong_engine_upgrade_image) # rollback volume.engineUpgrade(image=original_engine_image) volume = wait_for_volume_current_image(client, volume_name, original_engine_image) assert volume["engineImage"] == original_engine_image assert volume["currentImage"] == original_engine_image engine = get_volume_engine(volume) assert engine["engineImage"] == original_engine_image assert engine["currentImage"] == original_engine_image volume = common.wait_for_volume_replica_count(client, volume_name, REPLICA_COUNT) check_volume_data(volume, data) assert volume["state"] == common.VOLUME_STATE_ATTACHED assert volume["robustness"] == common.VOLUME_ROBUSTNESS_HEALTHY # try again, this time let's try detach volume.engineUpgrade(image=wrong_engine_upgrade_image) volume = client.by_id_volume(volume["name"]) assert volume["engineImage"] == wrong_engine_upgrade_image assert volume["currentImage"] == original_engine_image with pytest.raises(Exception): # this will timeout wait_for_volume_current_image(client, volume_name, wrong_engine_upgrade_image) volume = volume.detach() volume = wait_for_volume_current_image(client, volume_name, wrong_engine_upgrade_image) # all the images would be updated assert volume["engineImage"] == wrong_engine_upgrade_image engine = get_volume_engine(volume) assert engine["engineImage"] == wrong_engine_upgrade_image volume = common.wait_for_volume_replica_count(client, volume_name, REPLICA_COUNT) for replica in volume["replicas"]: assert replica["engineImage"] == wrong_engine_upgrade_image # upgrade to the correct image when offline volume.engineUpgrade(image=original_engine_image) volume = client.by_id_volume(volume["name"]) assert volume["engineImage"] == original_engine_image volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert volume["engineImage"] == original_engine_image assert volume["currentImage"] == original_engine_image engine = get_volume_engine(volume) assert engine["engineImage"] == original_engine_image assert engine["currentImage"] == original_engine_image for replica in volume["replicas"]: assert replica["engineImage"] == original_engine_image assert replica["currentImage"] == original_engine_image check_volume_data(volume, data) client.delete(volume) wait_for_volume_delete(client, volume_name) client.delete(new_img)
def test_csi_expansion_with_replica_failure(client, core_api, storage_class, pvc, pod_manifest): # NOQA """ Test expansion success but with one replica expansion failure 1. Create a new `storage_class` with `allowVolumeExpansion` set 2. Create PVC and Pod with dynamic provisioned volume from the StorageClass 3. Create an empty directory with expansion snapshot tmp meta file path for one replica so that the replica expansion will fail 4. Generate `test_data` and write to the pod 5. Delete the pod and wait for volume detachment 6. Update pvc.spec.resources to expand the volume 7. Check expansion result using Longhorn API. There will be expansion error caused by the failed replica but overall the expansion should succeed. 8. Create a new pod and check if the volume will rebuild the failed replica 9. Validate the volume content, then check if data writing looks fine """ create_storage_class(storage_class) pod_name = 'csi-expansion-with-replica-failure-test' pvc_name = pod_name + "-pvc" pvc['metadata']['name'] = pvc_name pvc['spec']['storageClassName'] = storage_class['metadata']['name'] create_pvc(pvc) pod_manifest['metadata']['name'] = pod_name pod_manifest['spec']['volumes'] = [{ 'name': pod_manifest['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': {'claimName': pvc_name}, }] create_and_wait_pod(core_api, pod_manifest) expand_size = str(EXPANDED_VOLUME_SIZE*Gi) pv = wait_and_get_pv_for_pvc(core_api, pvc_name) assert pv.status.phase == "Bound" volume_name = pv.spec.csi.volume_handle volume = client.by_id_volume(volume_name) failed_replica = volume.replicas[0] fail_replica_expansion(client, core_api, volume_name, expand_size, [failed_replica]) test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) wait_for_volume_detached(client, volume_name) # There will be replica expansion error info # but the expansion should succeed. pvc['spec']['resources'] = { 'requests': { 'storage': size_to_string(EXPANDED_VOLUME_SIZE*Gi) } } expand_and_wait_for_pvc(core_api, pvc) wait_for_expansion_failure(client, volume_name) wait_for_volume_expansion(client, volume_name) volume = client.by_id_volume(volume_name) assert volume.state == "detached" assert volume.size == expand_size for r in volume.replicas: if r.name == failed_replica.name: assert r.failedAt != "" else: assert r.failedAt == "" # Check if the replica will be rebuilded # and if the volume still works fine. create_and_wait_pod(core_api, pod_manifest) volume = wait_for_volume_healthy(client, volume_name) for r in volume.replicas: if r.name == failed_replica.name: assert r.mode == "" else: assert r.mode == "RW" resp = read_volume_data(core_api, pod_name) assert resp == test_data test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def test_kubernetes_status( client, core_api, storage_class, # NOQA statefulset, csi_pv, pvc, pod): # NOQA """ Test Volume feature: Kubernetes Status 1. Create StorageClass with `reclaimPolicy = Retain` 2. Create a statefulset `kubernetes-status-test` with the StorageClass 1. The statefulset has scale of 2. 3. Get the volume name from the SECOND pod of the StateufulSet pod and create an `extra_pod` with the same volume on the same node 4. Check the volumes that used by the StatefulSet 1. The volume used by the FIRST StatefulSet pod will have one workload 2. The volume used by the SECOND StatefulSet pod will have two workloads 3. Validate related status, e.g. pv/pod name/state, workload name/type 5. Check the volumes again 1. PV/PVC should still be bound 2. The volume used by the FIRST pod should have history data 3. The volume used by the SECOND and extra pod should have current data point to the extra pod 6. Delete the extra pod 1. Now all the volume's should only have history data(`lastPodRefAt` set) 7. Delete the PVC 1. PVC should be updated with status `Released` and become history data 8. Delete PV 1. All the Kubernetes status information should be cleaned up. 9. Reuse the two Longhorn volumes to create new pods 1. Since the `reclaimPolicy == Retain`, volume won't be deleted by Longhorn 2. Check the Kubernetes status now updated, with pod info but empty workload 3. Default Longhorn Static StorageClass will remove the PV with PVC, but leave Longhorn volume """ statefulset_name = 'kubernetes-status-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) storage_class['reclaimPolicy'] = 'Retain' create_storage_class(storage_class) create_and_wait_statefulset(statefulset) pod_info = get_statefulset_pod_info(core_api, statefulset) volume_info = [p['pv_name'] for p in pod_info] extra_pod_name = 'extra-pod-using-' + volume_info[1] pod['metadata']['name'] = extra_pod_name p2 = core_api.read_namespaced_pod(name=pod_info[1]['pod_name'], namespace='default') pod['spec']['nodeName'] = p2.spec.node_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pod_info[1]['pvc_name'], }, }] create_and_wait_pod(core_api, pod) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] # NOQA volume = client.by_id_volume(volume_name) k_status = volume.kubernetesStatus workloads = k_status.workloadsStatus assert k_status.pvName == p['pv_name'] assert k_status.pvStatus == 'Bound' assert k_status.namespace == 'default' assert k_status.pvcName == p['pvc_name'] assert not k_status.lastPVCRefAt assert not k_status.lastPodRefAt if i == 0: assert len(workloads) == 1 assert workloads[0].podName == p['pod_name'] assert workloads[0].workloadName == statefulset_name assert workloads[0].workloadType == 'StatefulSet' for _ in range(RETRY_COUNTS): if workloads[0].podStatus == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume.kubernetesStatus workloads = k_status.workloadsStatus assert workloads[0].podStatus == 'Running' if i == 1: assert len(k_status.workloadsStatus) == 2 if workloads[0].podName == pod_info[i]['pod_name']: assert workloads[1].podName == extra_pod_name assert workloads[0].workloadName == statefulset_name assert workloads[0].workloadType == 'StatefulSet' assert not workloads[1].workloadName assert not workloads[1].workloadType else: assert workloads[1].podName == pod_info[i]['pod_name'] assert workloads[0].podName == extra_pod_name assert not workloads[0].workloadName assert not workloads[0].workloadType assert workloads[1].workloadName == statefulset_name assert workloads[1].workloadType == 'StatefulSet' for _ in range(RETRY_COUNTS): if workloads[0].podStatus == 'Running' and \ workloads[1].podStatus == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume.kubernetesStatus workloads = k_status.workloadsStatus assert len(workloads) == 2 assert workloads[0].podStatus == 'Running' assert workloads[1].podStatus == 'Running' ks_list = [{}, {}] delete_and_wait_statefulset_only(core_api, statefulset) # the extra pod is still using the 2nd volume for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] ks_list[i]['pvName'] = p['pv_name'] ks_list[i]['pvStatus'] = 'Bound' ks_list[i]['namespace'] = 'default' ks_list[i]['pvcName'] = p['pvc_name'] ks_list[i]['lastPVCRefAt'] = '' if i == 0: ks_list[i]['lastPodRefAt'] = 'not empty' ks_list[i]['workloadsStatus'] = [ { 'podName': p['pod_name'], 'podStatus': 'Running', 'workloadName': statefulset_name, 'workloadType': 'StatefulSet', }, ] if i == 1: ks_list[i]['lastPodRefAt'] = '' ks_list[i]['workloadsStatus'] = [{ 'podName': extra_pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }] wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # deleted extra_pod, all volumes have no workload delete_and_wait_pod(core_api, pod['metadata']['name']) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] ks_list[i]['lastPodRefAt'] = 'not empty' wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # deleted pvc only. for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] delete_and_wait_pvc(core_api, p['pvc_name']) ks_list[i]['pvStatus'] = 'Released' ks_list[i]['lastPVCRefAt'] = 'not empty' wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # deleted pv only. for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] delete_and_wait_pv(core_api, p['pv_name']) ks_list[i]['pvName'] = '' ks_list[i]['pvStatus'] = '' wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # reuse that volume for p, volume_name in zip(pod_info, volume_info): p['pod_name'] = p['pod_name'].replace('kubernetes-status-test', 'kubernetes-status-test-reuse') p['pvc_name'] = p['pvc_name'].replace('kubernetes-status-test', 'kubernetes-status-test-reuse') p['pv_name'] = p['pvc_name'] csi_pv['metadata']['name'] = p['pv_name'] csi_pv['spec']['csi']['volumeHandle'] = volume_name csi_pv['spec']['storageClassName'] = \ DEFAULT_LONGHORN_STATIC_STORAGECLASS_NAME core_api.create_persistent_volume(csi_pv) pvc['metadata']['name'] = p['pvc_name'] pvc['spec']['volumeName'] = p['pv_name'] pvc['spec']['storageClassName'] = \ DEFAULT_LONGHORN_STATIC_STORAGECLASS_NAME core_api.create_namespaced_persistent_volume_claim(body=pvc, namespace='default') pod['metadata']['name'] = p['pod_name'] pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': p['pvc_name'], }, }] create_and_wait_pod(core_api, pod) ks = { 'pvName': p['pv_name'], 'pvStatus': 'Bound', 'namespace': 'default', 'pvcName': p['pvc_name'], 'lastPVCRefAt': '', 'lastPodRefAt': '', 'workloadsStatus': [ { 'podName': p['pod_name'], 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }, ], } wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pod(core_api, p['pod_name']) # Since persistentVolumeReclaimPolicy of csi_pv is `Delete`, # we don't need to delete bounded pv manually delete_and_wait_pvc(core_api, p['pvc_name']) wait_delete_pv(core_api, p['pv_name'])
def test_pvc_creation(client, core_api, pod): # NOQA volume_name = "test-pvc-creation" client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name pod_name = "pod-" + volume_name # try to create pvc without pv for the volume with pytest.raises(Exception) as e: volume.pvcCreate(namespace="default", pvcName=pvc_name) assert "connot find existing PV for volume" in str(e.value) volume.pvCreate(pvName=pv_name) for i in range(RETRY_COUNTS): if check_pv_existence(core_api, pv_name): break time.sleep(RETRY_INTERVAL) assert check_pv_existence(core_api, pv_name) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] for i in range(RETRY_COUNTS): if k_status['pvName'] and k_status['pvStatus'] == 'Available': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] assert k_status['pvName'] == pv_name assert k_status['pvStatus'] == 'Available' assert not k_status['namespace'] assert not k_status['pvcName'] assert not k_status['workloadsStatus'] assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] volume.pvcCreate(namespace="default", pvcName=pvc_name) for i in range(RETRY_COUNTS): if check_pvc_existence(core_api, pvc_name): break time.sleep(RETRY_INTERVAL) assert check_pvc_existence(core_api, pvc_name) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] for i in range(RETRY_COUNTS): if k_status['pvcName'] and k_status['namespace']: break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] assert k_status['pvName'] == pv_name assert k_status['pvStatus'] == 'Bound' assert k_status['namespace'] == "default" assert k_status['pvcName'] == pvc_name assert not k_status['workloadsStatus'] assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == pv_name assert k_status['pvStatus'] == 'Bound' assert len(workloads) == 1 for i in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 1 assert workloads[0]['podName'] == pod_name assert workloads[0]['podStatus'] == 'Running' assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert k_status['namespace'] == 'default' assert k_status['pvcName'] == pvc_name assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) wait_delete_pv(core_api, pv_name)
def test_csi_offline_expansion(client, core_api, storage_class, pvc, pod_manifest): # NOQA """ Test CSI feature: offline expansion 1. Create a new `storage_class` with `allowVolumeExpansion` set 2. Create PVC and Pod with dynamic provisioned volume from the StorageClass 3. Generate `test_data` and write to the pod 4. Delete the pod 5. Update pvc.spec.resources to expand the volume 6. Verify the volume expansion done using Longhorn API 7. Create a new pod and validate the volume content """ create_storage_class(storage_class) pod_name = 'csi-offline-expand-volume-test' pvc_name = pod_name + "-pvc" pvc['metadata']['name'] = pvc_name pvc['spec']['storageClassName'] = storage_class['metadata']['name'] create_pvc(pvc) pod_manifest['metadata']['name'] = pod_name pod_manifest['spec']['volumes'] = [{ 'name': pod_manifest['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': {'claimName': pvc_name}, }] create_and_wait_pod(core_api, pod_manifest) test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) pv = wait_and_get_pv_for_pvc(core_api, pvc_name) assert pv.status.phase == "Bound" volume_name = pv.spec.csi.volume_handle wait_for_volume_detached(client, volume_name) pvc['spec']['resources'] = { 'requests': { 'storage': size_to_string(EXPANDED_VOLUME_SIZE*Gi) } } expand_and_wait_for_pvc(core_api, pvc) wait_for_volume_expansion(client, volume_name) volume = client.by_id_volume(volume_name) assert volume.state == "detached" assert volume.size == str(EXPANDED_VOLUME_SIZE*Gi) pod_manifest['metadata']['name'] = pod_name pod_manifest['spec']['volumes'] = [{ 'name': pod_manifest['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': {'claimName': pvc_name}, }] create_and_wait_pod(core_api, pod_manifest) resp = read_volume_data(core_api, pod_name) assert resp == test_data volume = client.by_id_volume(volume_name) engine = get_volume_engine(volume) assert volume.size == str(EXPANDED_VOLUME_SIZE*Gi) assert volume.size == engine.size
def test_allow_volume_creation_with_degraded_availability_csi( client, core_api, apps_api, make_deployment_with_pvc): # NOQA """ Test Allow Volume Creation with Degraded Availability (CSI) Requirement: 1. Set `allow-volume-creation-with-degraded-availability` to true. 2. Set `node-level-soft-anti-affinity` to false. Steps: 1. Disable scheduling for node 3. 2. Create a Deployment Pod with a volume and 3 replicas. 1. After the volume is attached, scheduling error should be seen. 3. Write data to the Pod. 4. Scale down the deployment to 0 to detach the volume. 1. Scheduled condition should become true. 5. Scale up the deployment back to 1 and verify the data. 1. Scheduled condition should become false. 6. Enable the scheduling for node 3. 1. Volume should start rebuilding on the node 3 soon. 2. Once the rebuilding starts, the scheduled condition should become true. 7. Once rebuild finished, scale down and back the deployment to verify the data. """ setting = client.by_id_setting(common.SETTING_DEGRADED_AVAILABILITY) client.update(setting, value="true") setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="false") nodes = client.list_node() node3 = nodes[2] client.update(node3, allowScheduling=False) vol = common.create_and_check_volume(client, generate_volume_name(), size=str(500 * Mi)) pv_name = vol.name + "-pv" common.create_pv_for_volume(client, core_api, vol, pv_name) pvc_name = vol.name + "-pvc" common.create_pvc_for_volume(client, core_api, vol, pvc_name) deployment_name = vol.name + "-dep" deployment = make_deployment_with_pvc(deployment_name, pvc_name) deployment["spec"]["replicas"] = 3 apps_api.create_namespaced_deployment(body=deployment, namespace='default') common.wait_for_volume_status(client, vol.name, common.VOLUME_FIELD_STATE, common.VOLUME_STATE_ATTACHED) common.wait_scheduling_failure(client, vol.name) data_path = "/data/test" pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name) common.write_pod_volume_random_data(core_api, pod.metadata.name, data_path, common.DATA_SIZE_IN_MB_2) created_md5sum = get_pod_data_md5sum(core_api, pod.metadata.name, data_path) deployment['spec']['replicas'] = 0 apps_api.patch_namespaced_deployment(body=deployment, namespace='default', name=deployment_name) vol = common.wait_for_volume_detached(client, vol.name) assert vol.conditions[VOLUME_CONDITION_SCHEDULED]['status'] == "True" deployment['spec']['replicas'] = 1 apps_api.patch_namespaced_deployment(body=deployment, namespace='default', name=deployment_name) common.wait_for_volume_status(client, vol.name, common.VOLUME_FIELD_STATE, common.VOLUME_STATE_ATTACHED) common.wait_for_volume_condition_scheduled(client, vol.name, "status", common.CONDITION_STATUS_FALSE) pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name) assert created_md5sum == get_pod_data_md5sum(core_api, pod.metadata.name, data_path) client.update(node3, allowScheduling=True) common.wait_for_rebuild_start(client, vol.name) vol = client.by_id_volume(vol.name) assert vol.conditions[VOLUME_CONDITION_SCHEDULED]['status'] == "True" common.wait_for_rebuild_complete(client, vol.name) deployment['spec']['replicas'] = 0 apps_api.patch_namespaced_deployment(body=deployment, namespace='default', name=deployment_name) common.wait_for_volume_detached(client, vol.name) deployment['spec']['replicas'] = 1 apps_api.patch_namespaced_deployment(body=deployment, namespace='default', name=deployment_name) common.wait_for_volume_status(client, vol.name, common.VOLUME_FIELD_STATE, common.VOLUME_STATE_ATTACHED) pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name) assert created_md5sum == get_pod_data_md5sum(core_api, pod.metadata.name, data_path)
def test_kubernetes_status(client, core_api, storage_class, # NOQA statefulset, csi_pv, pvc, pod): # NOQA statefulset_name = 'kubernetes-status-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) storage_class['reclaimPolicy'] = 'Retain' create_storage_class(storage_class) create_and_wait_statefulset(statefulset) pod_info = get_statefulset_pod_info(core_api, statefulset) volume_info = [p['pv_name'] for p in pod_info] extra_pod_name = 'extra-pod-using-' + volume_info[1] pod['metadata']['name'] = extra_pod_name p2 = core_api.read_namespaced_pod(name=pod_info[1]['pod_name'], namespace='default') pod['spec']['nodeName'] = p2.spec.node_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pod_info[1]['pvc_name'], }, }] create_and_wait_pod(core_api, pod) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == p['pv_name'] assert k_status['pvStatus'] == 'Bound' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] if i == 0: assert len(workloads) == 1 assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' for _ in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert workloads[0]['podStatus'] == 'Running' if i == 1: assert len(k_status['workloadsStatus']) == 2 if workloads[0]['podName'] == pod_info[i]['pod_name']: assert workloads[1]['podName'] == extra_pod_name assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' assert not workloads[1]['workloadName'] assert not workloads[1]['workloadType'] else: assert workloads[1]['podName'] == pod_info[i]['pod_name'] assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert workloads[1]['workloadName'] == statefulset_name assert workloads[1]['workloadType'] == 'StatefulSet' for _ in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running' and \ workloads[1]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 2 assert workloads[0]['podStatus'] == 'Running' assert workloads[1]['podStatus'] == 'Running' # the extra pod is still using the 2nd volume delete_and_wait_statefulset_only(core_api, statefulset) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == p['pv_name'] assert k_status['pvStatus'] == 'Bound' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert not k_status['lastPVCRefAt'] assert len(workloads) == 1 if i == 0: assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' assert k_status['lastPodRefAt'] if i == 1: assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert not k_status['lastPodRefAt'] # deleted extra_pod, all volumes have no workload delete_and_wait_pod(core_api, pod['metadata']['name']) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == p['pv_name'] assert k_status['pvStatus'] == 'Bound' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert not k_status['lastPVCRefAt'] assert k_status['lastPodRefAt'] assert len(workloads) == 1 if i == 0: assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' if i == 1: assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] # deleted pvc only. for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] delete_and_wait_pvc(core_api, p['pvc_name']) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] for _ in range(RETRY_COUNTS): if k_status['pvStatus'] == 'Released': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == p['pv_name'] assert k_status['pvStatus'] == 'Released' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert k_status['lastPVCRefAt'] assert k_status['lastPodRefAt'] assert len(workloads) == 1 if i == 0: assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' if i == 1: assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] # deleted pv only. for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] delete_and_wait_pv(core_api, p['pv_name']) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == '' assert k_status['pvStatus'] == '' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert k_status['lastPVCRefAt'] assert k_status['lastPodRefAt'] assert len(workloads) == 1 if i == 0: assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' if i == 1: assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] # reuse that volume for p, volume_name in zip(pod_info, volume_info): p['pod_name'] = p['pod_name'].replace('kubernetes-status-test', 'kubernetes-status-test-reuse') p['pvc_name'] = p['pvc_name'].replace('kubernetes-status-test', 'kubernetes-status-test-reuse') p['pv_name'] = p['pvc_name'] csi_pv['metadata']['name'] = p['pv_name'] csi_pv['spec']['csi']['volumeHandle'] = volume_name core_api.create_persistent_volume(csi_pv) pvc['metadata']['name'] = p['pvc_name'] pvc['spec']['volumeName'] = p['pv_name'] core_api.create_namespaced_persistent_volume_claim( body=pvc, namespace='default') pod['metadata']['name'] = p['pod_name'] pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': p['pvc_name'], }, }] create_and_wait_pod(core_api, pod) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 1 assert k_status['pvName'] == p['pv_name'] for _ in range(RETRY_COUNTS): if k_status['pvStatus'] == 'Bound': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 1 assert k_status['pvStatus'] == 'Bound' for _ in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 1 assert workloads[0]['podStatus'] == 'Running' assert workloads[0]['podName'] == p['pod_name'] assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] delete_and_wait_pod(core_api, p['pod_name']) # Since persistentVolumeReclaimPolicy of csi_pv is `Delete`, # we don't need to delete bounded pv manually delete_and_wait_pvc(core_api, p['pvc_name']) wait_delete_pv(core_api, p['pv_name'])
def cleanup_volume(client, vol_name): # NOQA volume = client.by_id_volume(vol_name) volume.detach() client.delete(volume) common.wait_for_volume_delete(client, vol_name)
def test_node_delete_umount_disks(client): # NOQA # create test disks for node disk_volume_name = 'vol-disk-1' lht_hostId = get_self_host_id() node = client.by_id_node(lht_hostId) disks = node["disks"] disk_path1 = create_host_disk(client, disk_volume_name, str(Gi), lht_hostId) disk1 = {"path": disk_path1, "allowScheduling": True, "storageReserved": SMALL_DISK_SIZE} update_disk = get_update_disks(disks) for disk in update_disk: disk["allowScheduling"] = False # add new disk for node update_disk.append(disk1) # save disks to node node = node.diskUpdate(disks=update_disk) node = common.wait_for_disk_update(client, lht_hostId, len(update_disk)) assert len(node["disks"]) == len(update_disk) node = client.by_id_node(lht_hostId) assert len(node["disks"]) == len(update_disk) disks = node["disks"] # wait for node controller to update disk status for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling", True) wait_for_disk_status(client, lht_hostId, fsid, "storageReserved", SMALL_DISK_SIZE) free, total = common.get_host_disk_size(disk_path1) wait_for_disk_status(client, lht_hostId, fsid, "storageAvailable", free) wait_for_disk_status(client, lht_hostId, fsid, "storageMaximum", total) node = client.by_id_node(lht_hostId) disks = node["disks"] for key, disk in disks.iteritems(): if disk["path"] == disk_path1: assert disk["allowScheduling"] assert disk["storageReserved"] == SMALL_DISK_SIZE assert disk["storageScheduled"] == 0 free, total = common.get_host_disk_size(disk_path1) assert disk["storageMaximum"] == total assert disk["storageAvailable"] == free conditions = disk["conditions"] assert conditions[DISK_CONDITION_READY]["status"] == \ CONDITION_STATUS_TRUE assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \ CONDITION_STATUS_TRUE else: assert not disk["allowScheduling"] # create a volume nodes = client.list_node() vol_name = common.generate_volume_name() volume = create_volume(client, vol_name, str(SMALL_DISK_SIZE), lht_hostId, len(nodes)) replicas = volume["replicas"] for replica in replicas: id = replica["hostId"] assert id != "" assert replica["running"] if id == lht_hostId: assert replica["dataPath"].startswith(disk_path1) # umount the disk mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name) common.umount_disk(mount_path) # wait for update node status node = client.by_id_node(lht_hostId) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling", False) wait_for_disk_status(client, lht_hostId, fsid, "storageMaximum", 0) wait_for_disk_conditions(client, lht_hostId, fsid, DISK_CONDITION_READY, CONDITION_STATUS_FALSE) # check result node = client.by_id_node(lht_hostId) disks = node["disks"] update_disks = [] for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: assert not disk["allowScheduling"] assert disk["storageMaximum"] == 0 assert disk["storageAvailable"] == 0 assert disk["storageReserved"] == SMALL_DISK_SIZE assert disk["storageScheduled"] == SMALL_DISK_SIZE conditions = disk["conditions"] assert conditions[DISK_CONDITION_READY]["status"] == \ CONDITION_STATUS_FALSE assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \ CONDITION_STATUS_FALSE else: conditions = disk["conditions"] assert conditions[DISK_CONDITION_READY]["status"] == \ CONDITION_STATUS_TRUE assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \ CONDITION_STATUS_TRUE update_disks.append(disk) # delete umount disk exception with pytest.raises(Exception) as e: node.diskUpdate(disks=update_disks) assert "disable the disk" in str(e.value) # update other disks disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] != disk_path1: disk["allowScheduling"] = True test_update = get_update_disks(disks) node = node.diskUpdate(disks=test_update) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] != disk_path1: wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling", True) node = client.by_id_node(lht_hostId) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] != disk_path1: assert disk["allowScheduling"] # mount the disk back mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name) disk_volume = client.by_id_volume(disk_volume_name) dev = get_volume_endpoint(disk_volume) common.mount_disk(dev, mount_path) # wait for update node status node = client.by_id_node(lht_hostId) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling", False) wait_for_disk_conditions(client, lht_hostId, fsid, DISK_CONDITION_READY, CONDITION_STATUS_TRUE) # check result node = client.by_id_node(lht_hostId) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: free, total = common.get_host_disk_size(disk_path1) assert not disk["allowScheduling"] assert disk["storageMaximum"] == total assert disk["storageAvailable"] == free assert disk["storageReserved"] == SMALL_DISK_SIZE assert disk["storageScheduled"] == SMALL_DISK_SIZE conditions = disk["conditions"] assert conditions[DISK_CONDITION_READY]["status"] == \ CONDITION_STATUS_TRUE assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \ CONDITION_STATUS_TRUE else: conditions = disk["conditions"] assert conditions[DISK_CONDITION_READY]["status"] == \ CONDITION_STATUS_TRUE assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \ CONDITION_STATUS_TRUE # delete volume and umount disk cleanup_volume(client, vol_name) mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name) common.umount_disk(mount_path) # wait for update node status node = client.by_id_node(lht_hostId) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling", False) wait_for_disk_status(client, lht_hostId, fsid, "storageScheduled", 0) wait_for_disk_status(client, lht_hostId, fsid, "storageMaximum", 0) # test delete the umount disk node = client.by_id_node(lht_hostId) node.diskUpdate(disks=update_disks) node = common.wait_for_disk_update(client, lht_hostId, len(update_disks)) assert len(node["disks"]) == len(update_disks) cmd = ['rm', '-r', mount_path] subprocess.check_call(cmd)
def test_zone_tags(client, core_api, volume_name, k8s_node_zone_tags): # NOQA """ Test anti affinity zone feature 1. Add Kubernetes zone labels to the nodes 1. Only two zones now: zone1 and zone2 2. Create a volume with two replicas 3. Verify zone1 and zone2 either has one replica. 4. Remove a random replica and wait for volume to finish rebuild 5. Verify zone1 and zone2 either has one replica. 6. Repeat step 4-5 a few times. 7. Update volume to 3 replicas, make sure they're scheduled on 3 nodes 8. Remove a random replica and wait for volume to finish rebuild 9. Make sure replicas are on different nodes 10. Repeat step 8-9 a few times """ wait_longhorn_node_zone_updated(client) volume = create_and_check_volume(client, volume_name, num_of_replicas=2) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) volume = client.by_id_volume(volume_name) zone1_replica_count = get_zone_replica_count(client, volume_name, ZONE1) zone2_replica_count = get_zone_replica_count(client, volume_name, ZONE2) assert zone1_replica_count == zone2_replica_count for i in range(randrange(3, 5)): volume = client.by_id_volume(volume_name) replica_count = len(volume.replicas) assert replica_count == 2 replica_id = randrange(0, replica_count) replica_name = volume.replicas[replica_id].name volume.replicaRemove(name=replica_name) wait_for_volume_degraded(client, volume_name) wait_for_volume_healthy(client, volume_name) wait_for_volume_replica_count(client, volume_name, replica_count) volume = client.by_id_volume(volume_name) replica_names = map(lambda replica: replica.name, volume["replicas"]) wait_new_replica_ready(client, volume_name, replica_names) zone1_replica_count = \ get_zone_replica_count(client, volume_name, ZONE1) zone2_replica_count = \ get_zone_replica_count(client, volume_name, ZONE2) assert zone1_replica_count == zone2_replica_count volume.updateReplicaCount(replicaCount=3) wait_for_volume_degraded(client, volume_name) wait_for_volume_replica_count(client, volume_name, 3) wait_for_volume_healthy(client, volume_name) volume = client.by_id_volume(volume_name) lh_node_names = list(map(lambda node: node.name, client.list_node())) for replica in volume.replicas: lh_node_names.remove(replica.hostId) assert lh_node_names == [] for i in range(randrange(3, 5)): volume = client.by_id_volume(volume_name) replica_count = len(volume.replicas) assert replica_count == 3 replica_id = randrange(0, replica_count) replica_name = volume.replicas[replica_id].name volume.replicaRemove(name=replica_name) wait_for_volume_degraded(client, volume_name) wait_for_volume_healthy(client, volume_name) wait_for_volume_replica_count(client, volume_name, replica_count) volume = client.by_id_volume(volume_name) lh_node_names = list(map(lambda node: node.name, client.list_node())) for replica in volume.replicas: lh_node_names.remove(replica.hostId) assert lh_node_names == []
def test_setting_toleration(): """ Test toleration setting 1. Set `taint-toleration` to "key1=value1:NoSchedule; key2:InvalidEffect". 2. Verify the request fails. 3. Create a volume and attach it. 4. Set `taint-toleration` to "key1=value1:NoSchedule; key2:NoExecute". 5. Verify that cannot update toleration setting when any volume is attached. 6. Generate and write `data1` into the volume. 7. Detach the volume. 8. Set `taint-toleration` to "key1=value1:NoSchedule; key2:NoExecute". 9. Wait for all the Longhorn system components to restart with new toleration. 10. Verify that UI, manager, and drive deployer don't restart and don't have new toleration. 11. Attach the volume again and verify the volume `data1`. 12. Generate and write `data2` to the volume. 13. Detach the volume. 14. Clean the `toleration` setting. 15. Wait for all the Longhorn system components to restart with no toleration. 16. Attach the volume and validate `data2`. 17. Generate and write `data3` to the volume. """ client = get_longhorn_api_client() # NOQA apps_api = get_apps_api_client() # NOQA core_api = get_core_api_client() # NOQA count = len(client.list_node()) setting = client.by_id_setting(SETTING_TAINT_TOLERATION) with pytest.raises(Exception) as e: client.update(setting, value="key1=value1:NoSchedule; key2:InvalidEffect") assert 'invalid effect' in str(e.value) volume_name = "test-toleration-vol" # NOQA volume = create_and_check_volume(client, volume_name) volume.attach(hostId=get_self_host_id()) volume = wait_for_volume_healthy(client, volume_name) setting_value_str = "key1=value1:NoSchedule; key2:NoExecute" setting_value_dicts = [ { "key": "key1", "value": "value1", "operator": "Equal", "effect": "NoSchedule" }, { "key": "key2", "value": None, "operator": "Exists", "effect": "NoExecute" }, ] with pytest.raises(Exception) as e: client.update(setting, value=setting_value_str) assert 'cannot modify toleration setting before all volumes are detached' \ in str(e.value) data1 = write_volume_random_data(volume) check_volume_data(volume, data1) volume.detach(hostId="") wait_for_volume_detached(client, volume_name) setting = client.update(setting, value=setting_value_str) assert setting.value == setting_value_str wait_for_toleration_update(core_api, apps_api, count, setting_value_dicts) client, node = wait_for_longhorn_node_ready() volume = client.by_id_volume(volume_name) volume.attach(hostId=node) volume = wait_for_volume_healthy(client, volume_name) check_volume_data(volume, data1) data2 = write_volume_random_data(volume) check_volume_data(volume, data2) volume.detach(hostId="") wait_for_volume_detached(client, volume_name) # cleanup setting_value_str = "" setting_value_dicts = [] setting = client.by_id_setting(SETTING_TAINT_TOLERATION) setting = client.update(setting, value=setting_value_str) assert setting.value == setting_value_str wait_for_toleration_update(core_api, apps_api, count, setting_value_dicts) client, node = wait_for_longhorn_node_ready() volume = client.by_id_volume(volume_name) volume.attach(hostId=node) volume = wait_for_volume_healthy(client, volume_name) check_volume_data(volume, data2) data3 = write_volume_random_data(volume) check_volume_data(volume, data3) cleanup_volume(client, volume)
def test_setting_backing_image_auto_cleanup(client, core_api, volume_name): # NOQA """ Test that the Backing Image Cleanup Wait Interval setting works correctly. The default value of setting `BackingImageCleanupWaitInterval` is 60. 1. Create a backing image. 2. Create multiple volumes using the backing image. 3. Attach all volumes, Then: 1. Wait for all volumes can become running. 2. Verify the correct in all volumes. 3. Verify the backing image disk status map. 4. Verify the only backing image file in each disk is reused by multiple replicas. The backing image file path is `<Data path>/<The backing image name>/backing` 4. Unschedule test node to guarantee when replica removed from test node, no new replica can be rebuilt on the test node. 5. Remove all replicas in one disk. Wait for 50 seconds. Then verify nothing changes in the backing image disk state map (before the cleanup wait interval is passed). 6. Modify `BackingImageCleanupWaitInterval` to a small value. Then verify: 1. The download state of the disk containing no replica becomes terminating first, and the entry will be removed from the map later. 2. The related backing image file is removed. 3. The download state of other disks keep unchanged. All volumes still work fine. 7. Delete all volumes. Verify that there will only remain 1 entry in the backing image disk map 8. Delete the backing image. """ # Step 1 create_backing_image_with_matching_url(client, BACKING_IMAGE_NAME, BACKING_IMAGE_QCOW2_URL) # Step 2 volume_names = [volume_name + "-1", volume_name + "-2", volume_name + "-3"] for volume_name in volume_names: volume = create_and_check_volume(client, volume_name, 3, str(BACKING_IMAGE_EXT4_SIZE), BACKING_IMAGE_NAME) # Step 3 lht_host_id = get_self_host_id() for volume_name in volume_names: volume = client.by_id_volume(volume_name) volume.attach(hostId=lht_host_id) wait_for_volume_healthy(client, volume_name) assert volume.backingImage == BACKING_IMAGE_NAME backing_image = client.by_id_backing_image(BACKING_IMAGE_NAME) assert len(backing_image.diskFileStatusMap) == 3 for disk_id, status in iter(backing_image.diskFileStatusMap.items()): assert status.state == "ready" backing_images_in_disk = os.listdir("/var/lib/longhorn/backing-images") assert len(backing_images_in_disk) == 1 assert os.path.exists("/var/lib/longhorn/backing-images/{}/backing".format( backing_images_in_disk[0])) assert os.path.exists( "/var/lib/longhorn/backing-images/{}/backing.cfg".format( backing_images_in_disk[0])) # Step 4 current_host = client.by_id_node(id=lht_host_id) client.update(current_host, allowScheduling=False) wait_for_node_update(client, lht_host_id, "allowScheduling", False) # Step 5 for volume_name in volume_names: volume = client.by_id_volume(volume_name) for replica in volume.replicas: if replica.hostId == lht_host_id: replica_name = replica.name volume.replicaRemove(name=replica_name) # This wait interval should be smaller than the setting value. # Otherwise, the backing image files may be cleaned up. time.sleep(int(BACKING_IMAGE_CLEANUP_WAIT_INTERVAL)) check_backing_image_disk_map_status(client, BACKING_IMAGE_NAME, 3, "ready") # Step 6 update_setting(client, "backing-image-cleanup-wait-interval", "1") check_backing_image_disk_map_status(client, BACKING_IMAGE_NAME, 2, "ready") backing_images_in_disk = os.listdir("/var/lib/longhorn/backing-images") assert len(backing_images_in_disk) == 0 # Step 7 for volume_name in volume_names: volume = client.by_id_volume(volume_name) client.delete(volume) wait_for_volume_delete(client, volume_name) check_backing_image_disk_map_status(client, BACKING_IMAGE_NAME, 1, "ready")