def test_replica_rebuild_per_volume_limit(client, core_api, storage_class, sts_name, statefulset): # NOQA """ Test the volume always only have one replica scheduled for rebuild 1. Set soft anti-affinity to `true`. 2. Create a volume with 1 replica. 3. Attach the volume and write a few hundreds MB data to it. 4. Scale the volume replica to 5. 5. Constantly checking the volume replica list to make sure there should be only 1 replica in WO state. 6. Wait for the volume to complete rebuilding. Then remove 4 of the 5 replicas. 7. Monitoring the volume replica list again. 8. Once the rebuild was completed again, verify the data checksum. """ replica_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(replica_soft_anti_affinity_setting, value="true") data_path = '/data/test' storage_class['parameters']['numberOfReplicas'] = "1" vol_name, pod_name, md5sum = \ common.prepare_statefulset_with_data_in_mb( client, core_api, statefulset, sts_name, storage_class, data_path=data_path, data_size_in_mb=DATA_SIZE_IN_MB_2) # Scale the volume replica to 5 r_count = 5 vol = client.by_id_volume(vol_name) vol.updateReplicaCount(replicaCount=r_count) vol = common.wait_for_volume_replicas_mode(client, vol_name, 'RW', replica_count=r_count) # Delete 4 volume replicas del vol.replicas[0] for r in vol.replicas: vol.replicaRemove(name=r.name) r_count = 1 common.wait_for_volume_replicas_mode(client, vol_name, 'RW', replica_count=r_count) assert md5sum == common.get_pod_data_md5sum(core_api, pod_name, data_path)
def test_csi_block_volume(client, core_api, storage_class, pvc, pod_manifest): # NOQA """ Test CSI feature: raw block volume 1. Create a PVC with `volumeMode = Block` 2. Create a pod using the PVC to dynamic provision a volume 3. Verify the pod creation 4. Generate `test_data` and write to the block volume directly in the pod 5. Read the data back for validation 6. Delete the pod and create `pod2` to use the same volume 7. Validate the data in `pod2` is consistent with `test_data` """ pod_name = 'csi-block-volume-test' pvc_name = pod_name + "-pvc" device_path = "/dev/longhorn/longhorn-test-blk" storage_class['reclaimPolicy'] = 'Retain' pvc['metadata']['name'] = pvc_name pvc['spec']['volumeMode'] = 'Block' pvc['spec']['storageClassName'] = storage_class['metadata']['name'] pvc['spec']['resources'] = { 'requests': { 'storage': size_to_string(1 * Gi) } } pod_manifest['metadata']['name'] = pod_name pod_manifest['spec']['volumes'] = [{ 'name': 'longhorn-blk', 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] pod_manifest['spec']['containers'][0]['volumeMounts'] = [] pod_manifest['spec']['containers'][0]['volumeDevices'] = [ {'name': 'longhorn-blk', 'devicePath': device_path} ] create_storage_class(storage_class) create_pvc(pvc) pv_name = wait_and_get_pv_for_pvc(core_api, pvc_name).metadata.name create_and_wait_pod(core_api, pod_manifest) test_data = generate_random_data(VOLUME_RWTEST_SIZE) test_offset = random.randint(0, VOLUME_RWTEST_SIZE) write_pod_block_volume_data( core_api, pod_name, test_data, test_offset, device_path) returned_data = read_pod_block_volume_data( core_api, pod_name, len(test_data), test_offset, device_path ) assert test_data == returned_data md5_sum = get_pod_data_md5sum( core_api, pod_name, device_path) delete_and_wait_pod(core_api, pod_name) common.wait_for_volume_detached(client, pv_name) pod_name_2 = 'csi-block-volume-test-reuse' pod_manifest['metadata']['name'] = pod_name_2 create_and_wait_pod(core_api, pod_manifest) returned_data = read_pod_block_volume_data( core_api, pod_name_2, len(test_data), test_offset, device_path ) assert test_data == returned_data md5_sum_2 = get_pod_data_md5sum( core_api, pod_name_2, device_path) assert md5_sum == md5_sum_2 delete_and_wait_pod(core_api, pod_name_2) delete_and_wait_pvc(core_api, pvc_name) delete_and_wait_pv(core_api, pv_name)
def test_allow_volume_creation_with_degraded_availability_csi( client, core_api, apps_api, make_deployment_with_pvc): # NOQA """ Test Allow Volume Creation with Degraded Availability (CSI) Requirement: 1. Set `allow-volume-creation-with-degraded-availability` to true. 2. Set `node-level-soft-anti-affinity` to false. Steps: 1. Disable scheduling for node 3. 2. Create a Deployment Pod with a volume and 3 replicas. 1. After the volume is attached, scheduling error should be seen. 3. Write data to the Pod. 4. Scale down the deployment to 0 to detach the volume. 1. Scheduled condition should become true. 5. Scale up the deployment back to 1 and verify the data. 1. Scheduled condition should become false. 6. Enable the scheduling for node 3. 1. Volume should start rebuilding on the node 3 soon. 2. Once the rebuilding starts, the scheduled condition should become true. 7. Once rebuild finished, scale down and back the deployment to verify the data. """ setting = client.by_id_setting(common.SETTING_DEGRADED_AVAILABILITY) client.update(setting, value="true") setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="false") nodes = client.list_node() node3 = nodes[2] client.update(node3, allowScheduling=False) vol = common.create_and_check_volume(client, generate_volume_name(), size=str(500 * Mi)) pv_name = vol.name + "-pv" common.create_pv_for_volume(client, core_api, vol, pv_name) pvc_name = vol.name + "-pvc" common.create_pvc_for_volume(client, core_api, vol, pvc_name) deployment_name = vol.name + "-dep" deployment = make_deployment_with_pvc(deployment_name, pvc_name) deployment["spec"]["replicas"] = 3 apps_api.create_namespaced_deployment(body=deployment, namespace='default') common.wait_for_volume_status(client, vol.name, common.VOLUME_FIELD_STATE, common.VOLUME_STATE_ATTACHED) common.wait_scheduling_failure(client, vol.name) data_path = "/data/test" pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name) common.write_pod_volume_random_data(core_api, pod.metadata.name, data_path, common.DATA_SIZE_IN_MB_2) created_md5sum = get_pod_data_md5sum(core_api, pod.metadata.name, data_path) deployment['spec']['replicas'] = 0 apps_api.patch_namespaced_deployment(body=deployment, namespace='default', name=deployment_name) vol = common.wait_for_volume_detached(client, vol.name) assert vol.conditions[VOLUME_CONDITION_SCHEDULED]['status'] == "True" deployment['spec']['replicas'] = 1 apps_api.patch_namespaced_deployment(body=deployment, namespace='default', name=deployment_name) common.wait_for_volume_status(client, vol.name, common.VOLUME_FIELD_STATE, common.VOLUME_STATE_ATTACHED) common.wait_for_volume_condition_scheduled(client, vol.name, "status", common.CONDITION_STATUS_FALSE) pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name) assert created_md5sum == get_pod_data_md5sum(core_api, pod.metadata.name, data_path) client.update(node3, allowScheduling=True) common.wait_for_rebuild_start(client, vol.name) vol = client.by_id_volume(vol.name) assert vol.conditions[VOLUME_CONDITION_SCHEDULED]['status'] == "True" common.wait_for_rebuild_complete(client, vol.name) deployment['spec']['replicas'] = 0 apps_api.patch_namespaced_deployment(body=deployment, namespace='default', name=deployment_name) common.wait_for_volume_detached(client, vol.name) deployment['spec']['replicas'] = 1 apps_api.patch_namespaced_deployment(body=deployment, namespace='default', name=deployment_name) common.wait_for_volume_status(client, vol.name, common.VOLUME_FIELD_STATE, common.VOLUME_STATE_ATTACHED) pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name) assert created_md5sum == get_pod_data_md5sum(core_api, pod.metadata.name, data_path)
def test_restore_rwo_volume_to_rwx(set_random_backupstore, client, core_api, volume_name, pvc, csi_pv, pod_make, make_deployment_with_pvc): # NOQA """ Test restoring a rwo to a rwx volume. 1. Create a volume with 'accessMode' rwo. 2. Create a PV and a PVC with access mode 'readwriteonce' and attach to the volume. 3. Create a pod and attach to the PVC. 4. Write some data into the pod and compute md5sum. 5. Take a backup of the volume. 6. Restore the backup with 'accessMode' rwx. 7. Create PV and PVC and attach to 2 pods. 8. Verify the data. """ data_path = "/data/test" pod_name, pv_name, pvc_name, md5sum = \ prepare_pod_with_data_in_mb(client, core_api, csi_pv, pvc, pod_make, volume_name, data_size_in_mb=DATA_SIZE_IN_MB_1, data_path=data_path) snap = create_snapshot(client, volume_name) volume = client.by_id_volume(volume_name) volume.snapshotBackup(name=snap.name) wait_for_backup_completion(client, volume_name, snap.name) bv, b1 = find_backup(client, volume_name, snap.name) restore_volume_name = 'restored-rwx-volume' restore_pv_name = restore_volume_name + "-pv" restore_pvc_name = restore_volume_name + "-pvc" client.create_volume(name=restore_volume_name, size=str(1 * Gi), numberOfReplicas=3, fromBackup=b1.url, accessMode='rwx') wait_for_volume_creation(client, restore_volume_name) restore_volume = wait_for_volume_detached(client, restore_volume_name) create_pv_for_volume(client, core_api, restore_volume, restore_pv_name) create_pvc_for_volume(client, core_api, restore_volume, restore_pvc_name) deployment = make_deployment_with_pvc('deployment-multi-pods-test', restore_pvc_name, replicas=2) apps_api = get_apps_api_client() create_and_wait_deployment(apps_api, deployment) deployment_label_selector = \ "name=" + deployment["metadata"]["labels"]["name"] deployment_pod_list = \ core_api.list_namespaced_pod(namespace="default", label_selector=deployment_label_selector) pod_name_1 = deployment_pod_list.items[0].metadata.name pod_name_2 = deployment_pod_list.items[1].metadata.name md5sum_pod1 = get_pod_data_md5sum(core_api, pod_name_1, data_path) md5sum_pod2 = get_pod_data_md5sum(core_api, pod_name_2, data_path) assert md5sum == md5sum_pod1 == md5sum_pod2
def test_rwx_parallel_writing(core_api, statefulset, pod): # NOQA """ Test parallel writing of data 1. Create a StatefulSet of 1 pod with VolumeClaimTemplate where accessMode is 'RWX'. 2. Wait for StatefulSet to come up healthy. 3. Create another statefulSet with same pvc which got created with first statefulSet. 4. Wait for statefulSet to come up healthy. 5. Start writing 800 MB data in first statefulSet `file 1` and start writing 500 MB data in second statefulSet `file 2`. 6. Compute md5sum. 7. Check the data md5sum in share manager pod volume """ statefulset_name = 'statefulset-rwx-parallel-writing-test' statefulset['metadata']['name'] = \ statefulset['spec']['selector']['matchLabels']['app'] = \ statefulset['spec']['serviceName'] = \ statefulset['spec']['template']['metadata']['labels']['app'] = \ statefulset_name statefulset['spec']['replicas'] = 1 statefulset['spec']['volumeClaimTemplates'][0]['spec']['storageClassName']\ = 'longhorn' statefulset['spec']['volumeClaimTemplates'][0]['spec']['accessModes'] \ = ['ReadWriteMany'] create_and_wait_statefulset(statefulset) statefulset_pod_name = statefulset_name + '-0' pvc_name = \ statefulset['spec']['volumeClaimTemplates'][0]['metadata']['name'] \ + '-' + statefulset_name + '-0' pv_name = get_volume_name(core_api, pvc_name) share_manager_name = 'share-manager-' + pv_name pod_name = 'pod-parallel-write-test' pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [create_pvc_spec(pvc_name)] create_and_wait_pod(core_api, pod) with Pool(2) as p: p.map( write_data_into_pod, [statefulset_pod_name + ':/data/test1', pod_name + ':/data/test2']) md5sum1 = get_pod_data_md5sum(core_api, statefulset_pod_name, 'data/test1') md5sum2 = get_pod_data_md5sum(core_api, pod_name, 'data/test2') command1 = 'md5sum /export' + '/' + pv_name + '/' + 'test1' + \ " | awk '{print $1}'" share_manager_data1 = exec_command_in_pod(core_api, command1, share_manager_name, LONGHORN_NAMESPACE) assert md5sum1 == share_manager_data1 command2 = 'md5sum /export' + '/' + pv_name + '/' + 'test2' + \ " | awk '{print $1}'" share_manager_data2 = exec_command_in_pod(core_api, command2, share_manager_name, LONGHORN_NAMESPACE) assert md5sum2 == share_manager_data2
def test_csi_volumesnapshot_restore_existing_backup( set_random_backupstore, # NOQA client, # NOQA core_api, # NOQA volume_name, # NOQA csi_pv, # NOQA pvc, # NOQA pod_make, # NOQA volumesnapshotclass, # NOQA volumesnapshotcontent, volumesnapshot, # NOQA volsnapshotclass_delete_policy, # NOQA backup_is_deleted): # NOQA """ Test retention of a backup while deleting the associated `VolumeSnapshot` via the csi snapshotter Context: We want to allow the user to programmatically create/restore/delete longhorn backups via the csi snapshot mechanism ref: https://kubernetes.io/docs/concepts/storage/volume-snapshots/ Setup: 1. Make sure your cluster contains the below crds https://github.com/kubernetes-csi/external-snapshotter /tree/master/client/config/crd 2. Make sure your cluster contains the snapshot controller https://github.com/kubernetes-csi/external-snapshotter /tree/master/deploy/kubernetes/snapshot-controller Steps: 1. create new snapshotClass with deletionPolicy set to Retain 2. call csi_volumesnapshot_creation_test(snapshotClass=custom) 3. call csi_volumesnapshot_restore_test() 4. call csi_volumesnapshot_deletion_test(deletionPolicy='Retain'): 5. cleanup """ csisnapclass = \ volumesnapshotclass(name="snapshotclass", deletepolicy=volsnapshotclass_delete_policy) pod_name, pv_name, pvc_name, md5sum = \ prepare_pod_with_data_in_mb(client, core_api, csi_pv, pvc, pod_make, volume_name, data_path="/data/test") volume = client.by_id_volume(volume_name) snap = create_snapshot(client, volume_name) volume.snapshotBackup(name=snap.name) wait_for_backup_completion(client, volume_name, snap.name) bv, b = find_backup(client, volume_name, snap.name) csivolsnap_name = volume_name + "-volumesnapshot" csivolsnap_namespace = "default" volsnapcontent = \ volumesnapshotcontent("volsnapcontent", csisnapclass["metadata"]["name"], "Delete", "bs://" + volume_name + "/" + b.name, csivolsnap_name, csivolsnap_namespace) csivolsnap = volumesnapshot(csivolsnap_name, csivolsnap_namespace, csisnapclass["metadata"]["name"], "volumeSnapshotContentName", volsnapcontent["metadata"]["name"]) restore_pvc_name = pvc["metadata"]["name"] + "-restore" restore_pvc_size = pvc["spec"]["resources"]["requests"]["storage"] restore_csi_volume_snapshot(core_api, client, csivolsnap, restore_pvc_name, restore_pvc_size) restore_pod = pod_make() restore_pod_name = restore_pod["metadata"]["name"] restore_pod['spec']['volumes'] = [create_pvc_spec(restore_pvc_name)] create_and_wait_pod(core_api, restore_pod) restore_md5sum = \ get_pod_data_md5sum(core_api, restore_pod_name, path="/data/test") assert restore_md5sum == md5sum # Delete volumeSnapshot test delete_volumesnapshot(csivolsnap["metadata"]["name"], "default") if backup_is_deleted is False: find_backup(client, volume_name, b["snapshotName"]) else: wait_for_backup_delete(client, volume_name, b["name"])
def test_csi_volumesnapshot_basic( set_random_backupstore, # NOQA volumesnapshotclass, # NOQA volumesnapshot, # NOQA client, # NOQA core_api, # NOQA volume_name, # NOQA csi_pv, # NOQA pvc, # NOQA pod_make, # NOQA volsnapshotclass_delete_policy, # NOQA backup_is_deleted): # NOQA """ Test creation / restoration / deletion of a backup via the csi snapshotter Context: We want to allow the user to programmatically create/restore/delete longhorn backups via the csi snapshot mechanism ref: https://kubernetes.io/docs/concepts/storage/volume-snapshots/ Setup: 1. Make sure your cluster contains the below crds https://github.com/kubernetes-csi/external-snapshotter /tree/master/client/config/crd 2. Make sure your cluster contains the snapshot controller https://github.com/kubernetes-csi/external-snapshotter /tree/master/deploy/kubernetes/snapshot-controller Steps: def csi_volumesnapshot_creation_test(snapshotClass=longhorn|custom): 1. create volume(1) 2. write data to volume(1) 3. create a kubernetes `VolumeSnapshot` object the `VolumeSnapshot.uuid` will be used to identify a **longhorn snapshot** and the associated `VolumeSnapshotContent` object 4. check creation of a new longhorn snapshot named `snapshot-uuid` 5. check for `VolumeSnapshotContent` named `snapcontent-uuid` 6. wait for `VolumeSnapshotContent.readyToUse` flag to be set to **true** 7. check for backup existance on the backupstore # the csi snapshot restore sets the fromBackup field same as # the StorageClass based restore approach. def csi_volumesnapshot_restore_test(): 8. create a `PersistentVolumeClaim` object where the `dataSource` field references the `VolumeSnapshot` object by name 9. verify creation of a new volume(2) bound to the pvc created in step(8) 10. verify data of new volume(2) equals data from backup (ie old data above) # default longhorn snapshot class is set to Delete # add a second test with a custom snapshot class with deletionPolicy # set to Retain you can reuse these methods for that and other tests def csi_volumesnapshot_deletion_test(deletionPolicy='Delete|Retain'): 11. delete `VolumeSnapshot` object 12. if deletionPolicy == Delete: 13. verify deletion of `VolumeSnapshot` and `VolumeSnapshotContent` objects 14. verify deletion of backup from backupstore 12. if deletionPolicy == Retain: 13. verify deletion of `VolumeSnapshot` 14. verify retention of `VolumeSnapshotContent` and backup on backupstore 15. cleanup """ csisnapclass = \ volumesnapshotclass(name="snapshotclass", deletepolicy=volsnapshotclass_delete_policy) pod_name, pv_name, pvc_name, md5sum = \ prepare_pod_with_data_in_mb(client, core_api, csi_pv, pvc, pod_make, volume_name, data_path="/data/test") # Create volumeSnapshot test csivolsnap = volumesnapshot(volume_name + "-volumesnapshot", "default", csisnapclass["metadata"]["name"], "persistentVolumeClaimName", pvc_name) volume = client.by_id_volume(volume_name) for i in range(RETRY_COUNTS): snapshots = volume.snapshotList() if len(snapshots) == 2: break time.sleep(RETRY_INTERVAL) lh_snapshot = None snapshots = volume.snapshotList() for snapshot in snapshots: if snapshot["name"] == "snapshot-" + csivolsnap["metadata"]["uid"]: lh_snapshot = snapshot assert lh_snapshot is not None wait_for_volumesnapshot_ready(csivolsnap["metadata"]["name"], csivolsnap["metadata"]["namespace"]) bv1, b = find_backup(client, volume_name, lh_snapshot["name"]) assert b["snapshotName"] == lh_snapshot["name"] restore_pvc_name = pvc["metadata"]["name"] + "-restore" restore_pvc_size = pvc["spec"]["resources"]["requests"]["storage"] restore_csi_volume_snapshot(core_api, client, csivolsnap, restore_pvc_name, restore_pvc_size) restore_pod = pod_make() restore_pod_name = restore_pod["metadata"]["name"] restore_pod['spec']['volumes'] = [create_pvc_spec(restore_pvc_name)] create_and_wait_pod(core_api, restore_pod) restore_md5sum = \ get_pod_data_md5sum(core_api, restore_pod_name, path="/data/test") assert restore_md5sum == md5sum # Delete volumeSnapshot test delete_volumesnapshot(csivolsnap["metadata"]["name"], "default") if backup_is_deleted is False: find_backup(client, volume_name, b["snapshotName"]) else: wait_for_backup_delete(client, volume_name, b["name"])
def test_upgrade(upgrade_image_tag, settings_reset, volume_name, pod_make, statefulset, storage_class): # NOQA """ Test Longhorn upgrade Prerequisite: - Disable Auto Salvage Setting 1. Find the upgrade image tag 2. Create a volume, generate and write data into the volume. 3. Create a Pod using a volume, generate and write data 4. Create a StatefulSet with 2 replicas, generate and write data to their volumes 5. Keep all volumes attached 6. Upgrade Longhorn system. 7. Check Pod and StatefulSet didn't restart after upgrade 8. Check All volumes data 9. Write data to StatefulSet pods, and Attached volume 10. Check data written to StatefulSet pods, and attached volume. 11. Detach the volume, and Delete Pod, and StatefulSet to detach theirvolumes 12. Upgrade all volumes engine images. 13. Attach the volume, and recreate Pod, and StatefulSet 14. Check All volumes data """ new_ei_name = "longhornio/longhorn-engine:" + upgrade_image_tag client = get_longhorn_api_client() core_api = get_core_api_client() host_id = get_self_host_id() pod_data_path = "/data/test" pod_volume_name = generate_volume_name() auto_salvage_setting = client.by_id_setting(SETTING_AUTO_SALVAGE) setting = client.update(auto_salvage_setting, value="false") assert setting.name == SETTING_AUTO_SALVAGE assert setting.value == "false" # Create Volume attached to a node. volume1 = create_and_check_volume(client, volume_name, size=SIZE) volume1.attach(hostId=host_id) volume1 = wait_for_volume_healthy(client, volume_name) volume1_data = write_volume_random_data(volume1) # Create Volume used by Pod pod_name, pv_name, pvc_name, pod_md5sum = \ prepare_pod_with_data_in_mb(client, core_api, pod_make, pod_volume_name, data_path=pod_data_path, add_liveness_prope=False) # Create multiple volumes used by StatefulSet statefulset_name = 'statefulset-upgrade-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) create_storage_class(storage_class) create_and_wait_statefulset(statefulset) statefulset_pod_info = get_statefulset_pod_info(core_api, statefulset) for sspod_info in statefulset_pod_info: sspod_info['data'] = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, sspod_info['pod_name'], sspod_info['data']) # upgrade Longhorn assert longhorn_upgrade(upgrade_image_tag) client = get_longhorn_api_client() # wait for 1 minute before checking pod restarts time.sleep(60) pod = core_api.read_namespaced_pod(name=pod_name, namespace='default') assert pod.status.container_statuses[0].restart_count == 0 for sspod_info in statefulset_pod_info: sspod = core_api.read_namespaced_pod(name=sspod_info['pod_name'], namespace='default') assert \ sspod.status.container_statuses[0].restart_count == 0 for sspod_info in statefulset_pod_info: resp = read_volume_data(core_api, sspod_info['pod_name']) assert resp == sspod_info['data'] res_pod_md5sum = get_pod_data_md5sum(core_api, pod_name, pod_data_path) assert res_pod_md5sum == pod_md5sum check_volume_data(volume1, volume1_data) for sspod_info in statefulset_pod_info: sspod_info['data'] = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, sspod_info['pod_name'], sspod_info['data']) for sspod_info in statefulset_pod_info: resp = read_volume_data(core_api, sspod_info['pod_name']) assert resp == sspod_info['data'] volume1 = client.by_id_volume(volume_name) volume1_data = write_volume_random_data(volume1) check_volume_data(volume1, volume1_data) statefulset['spec']['replicas'] = replicas = 0 apps_api = get_apps_api_client() apps_api.patch_namespaced_stateful_set( name=statefulset_name, namespace='default', body={ 'spec': { 'replicas': replicas } }) delete_and_wait_pod(core_api, pod_name) volume = client.by_id_volume(volume_name) volume.detach() volumes = client.list_volume() for v in volumes: wait_for_volume_detached(client, v.name) engineimages = client.list_engine_image() for ei in engineimages: if ei.image == new_ei_name: new_ei = ei volumes = client.list_volume() for v in volumes: volume = client.by_id_volume(v.name) volume.engineUpgrade(image=new_ei.image) statefulset['spec']['replicas'] = replicas = 2 apps_api = get_apps_api_client() apps_api.patch_namespaced_stateful_set( name=statefulset_name, namespace='default', body={ 'spec': { 'replicas': replicas } }) wait_statefulset(statefulset) pod = pod_make(name=pod_name) pod['spec']['volumes'] = [create_pvc_spec(pvc_name)] create_and_wait_pod(core_api, pod) volume1 = client.by_id_volume(volume_name) volume1.attach(hostId=host_id) volume1 = wait_for_volume_healthy(client, volume_name) for sspod_info in statefulset_pod_info: resp = read_volume_data(core_api, sspod_info['pod_name']) assert resp == sspod_info['data'] res_pod_md5sum = get_pod_data_md5sum(core_api, pod_name, pod_data_path) assert res_pod_md5sum == pod_md5sum check_volume_data(volume1, volume1_data)
def test_engine_live_upgrade_with_intensive_data_writing( client, core_api, volume_name, pod_make): # NOQA """ Test engine live upgrade with intensive data writing 1. Deploy a compatible new engine image 2. Create a volume(with the old default engine image) with /PV/PVC/Pod and wait for pod to be deployed. 3. Write data to a tmp file in the pod and get the md5sum 4. Upgrade the volume to the new engine image without waiting. 5. Keep copying data from the tmp file to the volume during the live upgrade. 6. Wait until the upgrade completed, verify the volume engine image changed 7. Wait for new replica mode update then check the engine status. 8. Verify all engine and replicas' engine image changed 9. Verify the reference count of the new engine image changed 10. Check the existing data. Then write new data to the upgraded volume and get the md5sum. 11. Delete the pod and wait for the volume detached. Then check engine and replicas's engine image again. 12. Recreate the pod. 13. Check if the attached volume is state `healthy` rather than `degraded`. 14. Check the data. """ default_img = common.get_default_engine_image(client) default_img_name = default_img.name default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) cli_v = default_img.cliAPIVersion cli_minv = default_img.cliAPIMinVersion ctl_v = default_img.controllerAPIVersion ctl_minv = default_img.controllerAPIMinVersion data_v = default_img.dataFormatVersion data_minv = default_img.dataFormatMinVersion engine_upgrade_image = common.get_upgrade_test_image( cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv) new_img = client.create_engine_image(image=engine_upgrade_image) new_img_name = new_img.name ei_status_value = get_engine_image_status_value(client, new_img_name) new_img = wait_for_engine_image_state(client, new_img_name, ei_status_value) assert new_img.refCount == 0 assert new_img.noRefSince != "" default_img = common.get_default_engine_image(client) default_img_name = default_img.name pod_name = volume_name + "-pod" pv_name = volume_name + "-pv" pvc_name = volume_name + "-pvc" pod = pod_make(name=pod_name) volume = create_and_check_volume(client, volume_name, num_of_replicas=3, size=str(1 * Gi)) original_engine_image = volume.engineImage assert original_engine_image != engine_upgrade_image create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) pod['spec']['volumes'] = [create_pvc_spec(pvc_name)] create_and_wait_pod(core_api, pod) volume = client.by_id_volume(volume_name) assert volume.engineImage == original_engine_image assert volume.currentImage == original_engine_image engine = get_volume_engine(volume) assert engine.engineImage == original_engine_image assert engine.currentImage == original_engine_image for replica in volume.replicas: assert replica.engineImage == original_engine_image assert replica.currentImage == original_engine_image data_path0 = "/tmp/test" data_path1 = "/data/test1" write_pod_volume_random_data(core_api, pod_name, data_path0, RANDOM_DATA_SIZE_LARGE) original_md5sum1 = get_pod_data_md5sum(core_api, pod_name, data_path0) volume.engineUpgrade(image=engine_upgrade_image) # Keep writing data to the volume during the live upgrade copy_pod_volume_data(core_api, pod_name, data_path0, data_path1) # Wait for live upgrade complete wait_for_volume_current_image(client, volume_name, engine_upgrade_image) volume = wait_for_volume_replicas_mode(client, volume_name, "RW") engine = get_volume_engine(volume) assert engine.engineImage == engine_upgrade_image check_volume_endpoint(volume) wait_for_engine_image_ref_count(client, default_img_name, 0) wait_for_engine_image_ref_count(client, new_img_name, 1) volume_file_md5sum1 = get_pod_data_md5sum(core_api, pod_name, data_path1) assert volume_file_md5sum1 == original_md5sum1 data_path2 = "/data/test2" write_pod_volume_random_data(core_api, pod_name, data_path2, RANDOM_DATA_SIZE_SMALL) original_md5sum2 = get_pod_data_md5sum(core_api, pod_name, data_path2) delete_and_wait_pod(core_api, pod_name) volume = wait_for_volume_detached(client, volume_name) assert len(volume.replicas) == 3 assert volume.engineImage == engine_upgrade_image engine = get_volume_engine(volume) assert engine.engineImage == engine_upgrade_image for replica in volume.replicas: assert replica.engineImage == engine_upgrade_image create_and_wait_pod(core_api, pod) common.wait_for_volume_healthy(client, volume_name) volume_file_md5sum1 = get_pod_data_md5sum(core_api, pod_name, data_path1) assert volume_file_md5sum1 == original_md5sum1 volume_file_md5sum2 = get_pod_data_md5sum(core_api, pod_name, data_path2) assert volume_file_md5sum2 == original_md5sum2
def test_cloning_basic(client, core_api, pvc, pod, clone_pvc, clone_pod, storage_class_name='longhorn'): # NOQA """ 1. Create a PVC: ```yaml apiVersion: v1 kind: PersistentVolumeClaim metadata: name: source-pvc spec: storageClassName: longhorn accessModes: - ReadWriteOnce resources: requests: storage: 3Gi ``` 2. Specify the `source-pvc` in a pod yaml and start the pod 3. Wait for the pod to be running, write some data to the mount path of the volume 4. Clone a volume by creating the PVC: ```yaml apiVersion: v1 kind: PersistentVolumeClaim metadata: name: cloned-pvc spec: storageClassName: longhorn dataSource: name: source-pvc kind: PersistentVolumeClaim accessModes: - ReadWriteOnce resources: requests: storage: 3Gi ``` 5. Wait for the `CloneStatus.State` in `cloned-pvc` to be `completed` 6. Clone volume should get detached after cloning completion, wait for it. 7. Specify the `cloned-pvc` in a cloned pod yaml and deploy the cloned pod 8. In 3-min retry loop, wait for the cloned pod to be running 9. Verify the data in `cloned-pvc` is the same as in `source-pvc` 10. In 2-min retry loop, verify the volume of the `clone-pvc` eventually becomes healthy """ # Step-1 source_pvc_name = 'source-pvc' + generate_random_suffix() pvc['metadata']['name'] = source_pvc_name pvc['spec']['storageClassName'] = storage_class_name core_api.create_namespaced_persistent_volume_claim( body=pvc, namespace='default') wait_for_pvc_phase(core_api, source_pvc_name, "Bound") # Step-2 pod_name = 'source-pod' + generate_random_suffix() pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [create_pvc_spec(source_pvc_name)] create_and_wait_pod(core_api, pod) # Step-3 write_pod_volume_random_data(core_api, pod_name, '/data/test', DATA_SIZE_IN_MB_2) source_data = get_pod_data_md5sum(core_api, pod_name, '/data/test') # Step-4 clone_pvc_name = 'clone-pvc' + generate_random_suffix() clone_pvc['metadata']['name'] = clone_pvc_name clone_pvc['spec']['storageClassName'] = storage_class_name clone_pvc['spec']['dataSource'] = { 'name': source_pvc_name, 'kind': 'PersistentVolumeClaim' } core_api.create_namespaced_persistent_volume_claim( body=clone_pvc, namespace='default') wait_for_pvc_phase(core_api, clone_pvc_name, "Bound") # Step-5 clone_volume_name = get_volume_name(core_api, clone_pvc_name) wait_for_volume_clone_status(client, clone_volume_name, VOLUME_FIELD_STATE, VOLUME_FIELD_CLONE_COMPLETED) # Step-6 wait_for_volume_detached(client, clone_volume_name) # Step-7,8 clone_pod_name = 'clone-pod' + generate_random_suffix() clone_pod['metadata']['name'] = clone_pod_name clone_pod['spec']['volumes'] = [create_pvc_spec(clone_pvc_name)] create_and_wait_pod(core_api, clone_pod) clone_data = get_pod_data_md5sum(core_api, clone_pod_name, '/data/test') # Step-9 assert source_data == clone_data # Step-10 wait_for_volume_healthy(client, clone_volume_name)
def test_cloning_interrupted(client, core_api, pvc, pod, clone_pvc, clone_pod): # NOQA """ 1. Create a PVC: ```yaml apiVersion: v1 kind: PersistentVolumeClaim metadata: name: source-pvc spec: storageClassName: longhorn accessModes: - ReadWriteOnce resources: requests: storage: 3Gi ``` 2. Specify the `source-pvc` in a pod yaml and start the pod 3. Wait for the pod to be running, write 500MB of data to the mount path of the volume 4. Clone a volume by creating the PVC: ```yaml apiVersion: v1 kind: PersistentVolumeClaim metadata: name: cloned-pvc spec: storageClassName: longhorn dataSource: name: source-pvc kind: PersistentVolumeClaim accessModes: - ReadWriteOnce resources: requests: storage: 3Gi ``` 5. Wait for the `CloneStatus.State` in `cloned-pvc` to be `initiated` 6. Kill all replicas process of the `source-pvc` 7. Wait for the `CloneStatus.State` in `cloned-pvc` to be `failed` 8. Clean up `clone-pvc` 9. Redeploy `cloned-pvc` and clone pod 10. In 3-min retry loop, verify cloned pod become running 11. `cloned-pvc` has the same data as `source-pvc` 12. In 2-min retry loop, verify the volume of the `clone-pvc` eventually becomes healthy. """ # Step-1 source_pvc_name = 'source-pvc' + generate_random_suffix() pvc['metadata']['name'] = source_pvc_name pvc['spec']['storageClassName'] = 'longhorn' core_api.create_namespaced_persistent_volume_claim( body=pvc, namespace='default') wait_for_pvc_phase(core_api, source_pvc_name, "Bound") # Step-2 pod_name = 'source-pod' + generate_random_suffix() pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [create_pvc_spec(source_pvc_name)] create_and_wait_pod(core_api, pod) # Step-3 write_pod_volume_random_data(core_api, pod_name, '/data/test', DATA_SIZE_IN_MB_3) source_data = get_pod_data_md5sum(core_api, pod_name, '/data/test') source_volume_name = get_volume_name(core_api, source_pvc_name) # Step-4 clone_pvc_name = 'clone-pvc' + generate_random_suffix() clone_pvc['metadata']['name'] = clone_pvc_name clone_pvc['spec']['storageClassName'] = 'longhorn' clone_pvc['spec']['dataSource'] = { 'name': source_pvc_name, 'kind': 'PersistentVolumeClaim' } core_api.create_namespaced_persistent_volume_claim( body=clone_pvc, namespace='default') # Step-5 clone_volume_name = get_clone_volume_name(client, source_volume_name) wait_for_volume_clone_status(client, clone_volume_name, VOLUME_FIELD_STATE, 'initiated') # Step-6 crash_replica_processes(client, core_api, source_volume_name) # Step-7 wait_for_volume_faulted(client, source_volume_name) wait_for_volume_clone_status(client, clone_volume_name, VOLUME_FIELD_STATE, 'failed') # Step-8 delete_and_wait_pvc(core_api, clone_pvc_name) # Step-9 clone_pvc_name = 'clone-pvc-2' + generate_random_suffix() clone_pvc['metadata']['name'] = clone_pvc_name clone_pvc['spec']['storageClassName'] = 'longhorn' clone_pvc['spec']['dataSource'] = { 'name': source_pvc_name, 'kind': 'PersistentVolumeClaim' } core_api.create_namespaced_persistent_volume_claim( body=clone_pvc, namespace='default') wait_for_pvc_phase(core_api, clone_pvc_name, "Bound") # Step-9 clone_pod_name = 'clone-pod' + generate_random_suffix() clone_pod['metadata']['name'] = clone_pod_name clone_pod['spec']['volumes'] = [create_pvc_spec(clone_pvc_name)] create_and_wait_pod(core_api, clone_pod) # Step-10 clone_volume_name = get_volume_name(core_api, clone_pvc_name) wait_for_volume_clone_status(client, clone_volume_name, VOLUME_FIELD_STATE, VOLUME_FIELD_CLONE_COMPLETED) # Step-11 clone_data = get_pod_data_md5sum(core_api, clone_pod_name, '/data/test') assert source_data == clone_data # Step-12 wait_for_volume_healthy(client, clone_volume_name)