def csi_io_test(client, core_api, csi_pv, pvc, pod_make, base_image=""): # NOQA pv_name = generate_volume_name() pod_name = 'csi-io-test' create_and_wait_csi_pod_named_pv(pv_name, pod_name, client, core_api, csi_pv, pvc, pod_make, base_image, "") test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) common.wait_for_volume_detached(client, csi_pv['metadata']['name']) pod_name = 'csi-io-test-2' pod = pod_make(name=pod_name) pod['spec']['volumes'] = [ create_pvc_spec(pv_name) ] csi_pv['metadata']['name'] = pv_name csi_pv['spec']['csi']['volumeHandle'] = pv_name pvc['metadata']['name'] = pv_name pvc['spec']['volumeName'] = pv_name update_storageclass_references(CSI_PV_TEST_STORAGE_NAME, csi_pv, pvc) create_and_wait_pod(core_api, pod) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def test_csi_io(client, core_api, csi_pv, pvc, pod): # NOQA """ Test that input and output on a statically defined CSI volume works as expected. Fixtures are torn down here in reverse order that they are specified as a parameter. Take caution when reordering test fixtures. """ pod_name = 'csi-io-test' pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [create_pvc_spec(pvc['metadata']['name'])] pvc['spec']['volumeName'] = csi_pv['metadata']['name'] test_data = generate_random_data(VOLUME_RWTEST_SIZE) create_pv_storage(core_api, client, csi_pv, pvc) create_and_wait_pod(core_api, pod) write_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) common.wait_for_volume_detached(client, csi_pv['metadata']['name']) pod_name = 'csi-io-test-2' pod['metadata']['name'] = pod_name create_and_wait_pod(core_api, pod) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def csi_backup_test(client, core_api, csi_pv, pvc, pod_make, backing_image=""): # NOQA pod_name = 'csi-backup-test' vol_name = create_and_wait_csi_pod( pod_name, client, core_api, csi_pv, pvc, pod_make, backing_image, "") test_data = generate_random_data(VOLUME_RWTEST_SIZE) setting = client.by_id_setting(common.SETTING_BACKUP_TARGET) # test backupTarget for multiple settings backupstores = common.get_backupstore_url() i = 1 for backupstore in backupstores: if common.is_backupTarget_s3(backupstore): backupsettings = backupstore.split("$") setting = client.update(setting, value=backupsettings[0]) assert setting.value == backupsettings[0] credential = client.by_id_setting( common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET) credential = client.update(credential, value=backupsettings[1]) assert credential.value == backupsettings[1] else: setting = client.update(setting, value=backupstore) assert setting.value == backupstore credential = client.by_id_setting( common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET) credential = client.update(credential, value="") assert credential.value == "" backupstore_test(client, core_api, csi_pv, pvc, pod_make, pod_name, vol_name, backing_image, test_data, i) i += 1 delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, vol_name) delete_and_wait_pv(core_api, vol_name)
def test_provisioner_io(client, core_api, storage_class, pvc, pod): # NOQA """ Test that input and output on a StorageClass provisioned PersistentVolumeClaim works as expected. Fixtures are torn down here in reverse order that they are specified as a parameter. Take caution when reordering test fixtures. """ # Prepare pod and volume specs. pod_name = 'provisioner-io-test' pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [create_pvc_spec(pvc['metadata']['name'])] pvc['spec']['storageClassName'] = DEFAULT_STORAGECLASS_NAME storage_class['metadata']['name'] = DEFAULT_STORAGECLASS_NAME test_data = generate_random_data(VOLUME_RWTEST_SIZE) create_storage(core_api, storage_class, pvc) create_and_wait_pod(core_api, pod) pvc_volume_name = get_volume_name(core_api, pvc['metadata']['name']) write_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) common.wait_for_volume_detached(client, pvc_volume_name) pod_name = 'flexvolume-provisioner-io-test-2' pod['metadata']['name'] = pod_name create_and_wait_pod(core_api, pod) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def test_flexvolume_io(client, core_api, flexvolume, pod): # NOQA """ Test that input and output on a statically defined volume works as expected. Fixtures are torn down here in reverse order that they are specified as a parameter. Take caution when reordering test fixtures. """ pod_name = 'flexvolume-io-test' pod['metadata']['name'] = pod_name pod['spec']['containers'][0]['volumeMounts'][0]['name'] = \ flexvolume['name'] pod['spec']['volumes'] = [flexvolume] test_data = generate_random_data(VOLUME_RWTEST_SIZE) create_and_wait_pod(core_api, pod) common.write_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) wait_for_volume_detached(client, flexvolume["name"]) pod_name = 'volume-driver-io-test-2' pod['metadata']['name'] = pod_name create_and_wait_pod(core_api, pod) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def test_provisioner_io(client, core_api, storage_class, pvc, pod): # NOQA """ Test that input and output on a StorageClass provisioned PersistentVolumeClaim works as expected. Fixtures are torn down here in reverse order that they are specified as a parameter. Take caution when reordering test fixtures. """ # Prepare pod and volume specs. pod_name = 'provisioner-io-test' pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [ create_pvc_spec(pvc['metadata']['name']) ] pvc['spec']['storageClassName'] = DEFAULT_STORAGECLASS_NAME storage_class['metadata']['name'] = DEFAULT_STORAGECLASS_NAME test_data = generate_random_data(VOLUME_RWTEST_SIZE) create_storage(core_api, storage_class, pvc) create_and_wait_pod(core_api, pod) pvc_volume_name = get_volume_name(core_api, pvc['metadata']['name']) write_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) common.wait_for_volume_detached(client, pvc_volume_name) pod_name = 'flexvolume-provisioner-io-test-2' pod['metadata']['name'] = pod_name create_and_wait_pod(core_api, pod) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def csi_io_test(client, core_api, csi_pv, pvc, pod_make, base_image=""): # NOQA pv_name = generate_volume_name() pod_name = 'csi-io-test' create_and_wait_csi_pod_named_pv(pv_name, pod_name, client, core_api, csi_pv, pvc, pod_make, base_image, "") test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) common.wait_for_volume_detached(client, csi_pv['metadata']['name']) pod_name = 'csi-io-test-2' pod = pod_make(name=pod_name) pod['spec']['volumes'] = [ create_pvc_spec(pv_name) ] csi_pv['metadata']['name'] = pv_name csi_pv['spec']['csi']['volumeHandle'] = pv_name pvc['metadata']['name'] = pv_name pvc['spec']['volumeName'] = pv_name update_storageclass_references(CSI_PV_TEST_STORAGE_NAME, csi_pv, pvc) create_and_wait_pod(core_api, pod) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def csi_backup_test(client, core_api, csi_pv, pvc, pod_make, backing_image=""): # NOQA pod_name = 'csi-backup-test' vol_name = create_and_wait_csi_pod( pod_name, client, core_api, csi_pv, pvc, pod_make, backing_image, "") test_data = generate_random_data(VOLUME_RWTEST_SIZE) backupstore_test(client, core_api, csi_pv, pvc, pod_make, pod_name, vol_name, backing_image, test_data) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, vol_name) delete_and_wait_pv(core_api, vol_name)
def csi_mount_test(client, core_api, csi_pv, pvc, pod_make, # NOQA volume_size, backing_image=""): # NOQA pod_name = 'csi-mount-test' create_and_wait_csi_pod(pod_name, client, core_api, csi_pv, pvc, pod_make, backing_image, "") volumes = client.list_volume().data assert len(volumes) == 1 assert volumes[0].name == csi_pv['metadata']['name'] assert volumes[0].size == str(volume_size) assert volumes[0].numberOfReplicas == \ int(csi_pv['spec']['csi']['volumeAttributes']["numberOfReplicas"]) assert volumes[0].state == "attached" assert volumes[0].backingImage == backing_image delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc['metadata']['name']) delete_and_wait_pv(core_api, csi_pv['metadata']['name'])
def flexvolume_io_test(client, core_api, flexvolume, pod): # NOQA pod_name = 'flexvolume-io-test' pod['metadata']['name'] = pod_name pod['spec']['containers'][0]['volumeMounts'][0]['name'] = \ flexvolume['name'] pod['spec']['volumes'] = [flexvolume] test_data = generate_random_data(VOLUME_RWTEST_SIZE) create_and_wait_pod(core_api, pod) common.write_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) wait_for_volume_detached(client, flexvolume["name"]) pod_name = 'volume-driver-io-test-2' pod['metadata']['name'] = pod_name create_and_wait_pod(core_api, pod) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def test_reset_env(): k8s_api_client = get_core_api_client() k8s_storage_client = get_storage_api_client() longhorn_api_client = get_longhorn_api_client() pod_list = k8s_api_client.list_namespaced_pod("default") for pod in pod_list.items: if STRESS_POD_NAME_PREFIX in pod.metadata.name: delete_and_wait_pod(k8s_api_client, pod.metadata.name) pvc_list = \ k8s_api_client.list_namespaced_persistent_volume_claim("default") for pvc in pvc_list.items: if STRESS_PVC_NAME_PREFIX in pvc.metadata.name: delete_and_wait_pvc(k8s_api_client, pvc.metadata.name) pv_list = k8s_api_client.list_persistent_volume() for pv in pv_list.items: pv_name = pv.metadata.name if STRESS_PV_NAME_PREFIX in pv_name: try: delete_and_wait_pv(k8s_api_client, pv_name) except AssertionError: volumeattachment_list = \ k8s_storage_client.list_volume_attachment() for volumeattachment in volumeattachment_list.items: volume_attachment_name = \ volumeattachment.spec.source.persistent_volume_name if volume_attachment_name == pv_name: delete_and_wait_volume_attachment( k8s_storage_client, volume_attachment_name ) delete_and_wait_pv(k8s_api_client, pv.metadata.name) volume_list = \ longhorn_api_client.list_volume() for volume in volume_list.data: if STRESS_VOLUME_NAME_PREFIX in volume.name: delete_and_wait_longhorn(longhorn_api_client, volume.name)
def revert_random_snapshot(client, core_api, volume_name, pod_manifest, snapshots_md5sum): # NOQA volume = client.by_id_volume(volume_name) host_id = get_self_host_id() pod_name = pod_manifest["metadata"]["name"] # wait for volume healthy if rebuilding deleted replica if len(volume.robustness) != VOLUME_ROBUSTNESS_HEALTHY: wait_for_volume_healthy(client, volume_name) snapshot = get_random_snapshot(snapshots_md5sum) if snapshot is None: print("skipped, no snapshot found", end=" ") return delete_and_wait_pod(core_api, pod_name) wait_for_volume_detached(client, volume_name) volume = client.by_id_volume(volume_name) volume.attach(hostId=host_id, disableFrontend=True) volume = wait_for_volume_healthy_no_frontend(client, volume_name) volume.snapshotRevert(name=snapshot) volume = client.by_id_volume(volume_name) volume.detach() wait_for_volume_detached(client, volume_name) create_and_wait_pod(core_api, pod_manifest) current_md5sum = read_data_md5sum(core_api, pod_name) assert current_md5sum == snapshots_md5sum[snapshot].data_md5sum
def backupstore_test(client, core_api, csi_pv, pvc, pod_make, pod_name, vol_name, backing_image, test_data): # NOQA write_pod_volume_data(core_api, pod_name, test_data) volume = client.by_id_volume(vol_name) snap = create_snapshot(client, vol_name) volume.snapshotBackup(name=snap.name) common.wait_for_backup_completion(client, vol_name, snap.name) bv, b = common.find_backup(client, vol_name, snap.name) pod2_name = 'csi-backup-test-2' vol2_name = create_and_wait_csi_pod( pod2_name, client, core_api, csi_pv, pvc, pod_make, backing_image, b.url) volume2 = client.by_id_volume(vol2_name) resp = read_volume_data(core_api, pod2_name) assert resp == test_data delete_backup(client, bv.name, b.name) delete_and_wait_pod(core_api, pod2_name) client.delete(volume2)
def flexvolume_io_test(client, core_api, flexvolume, pod): # NOQA pod_name = 'flexvolume-io-test' pod['metadata']['name'] = pod_name pod['spec']['containers'][0]['volumeMounts'][0]['name'] = \ flexvolume['name'] pod['spec']['volumes'] = [ flexvolume ] test_data = generate_random_data(VOLUME_RWTEST_SIZE) create_and_wait_pod(core_api, pod) common.write_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) wait_for_volume_detached(client, flexvolume["name"]) pod_name = 'volume-driver-io-test-2' pod['metadata']['name'] = pod_name create_and_wait_pod(core_api, pod) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def test_replica_auto_balance_zone_best_effort_with_data_locality( client, core_api, volume_name, pod): # NOQA """ Background: Given set `replica-soft-anti-affinity` to `true`. And set `replica-zone-soft-anti-affinity` to `true`. And set `default-data-locality` to `best-effort`. And set `replicaAutoBalance` to `best-effort`. And set node-1 to zone-1. set node-2 to zone-1. set node-3 to zone-2. And create volume with 2 replicas. And create pv for volume. And create pvc for volume. Scenario Outline: replica auto-balance zones with best-effort should not remove pod local replicas when data locality is enabled (best-effort). Given create and wait pod on <pod-node>. And disable scheduling and evict node-3. And count replicas on each nodes. And 1 replica running on <pod-node>. 1 replica running on <duplicate-node>. 0 replica running on node-3. When enable scheduling for node-3. Then count replicas on each nodes. And 1 replica running on <pod-node>. 0 replica running on <duplicate-node>. 1 replica running on node-3. And count replicas in each zones. And 1 replica running in zone-1. 1 replica running in zone-2. And loop 3 times with each wait 5 seconds and count replicas on each nodes. To ensure no addition scheduling is happening. 1 replica running on <pod-node>. 0 replica running on <duplicate-node>. 1 replica running on node-3. And delete pod. Examples: | pod-node | duplicate-node | | node-1 | node-2 | | node-2 | node-1 | | node-1 | node-2 | """ common.update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY, "true") common.update_setting(client, SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY, "true") common.update_setting(client, SETTING_DEFAULT_DATA_LOCALITY, "best-effort") common.update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "best-effort") n1, n2, n3 = client.list_node() set_k8s_node_zone_label(core_api, n1.name, ZONE1) set_k8s_node_zone_label(core_api, n2.name, ZONE1) set_k8s_node_zone_label(core_api, n3.name, ZONE2) wait_longhorn_node_zone_updated(client) n_replicas = 2 volume = create_and_check_volume(client, volume_name, num_of_replicas=n_replicas) common.create_pv_for_volume(client, core_api, volume, volume_name) common.create_pvc_for_volume(client, core_api, volume, volume_name) pod['spec']['volumes'] = [{ "name": "pod-data", "persistentVolumeClaim": { "claimName": volume_name } }] for i in range(1, 4): pod_node_name = n2.name if i % 2 == 0 else n1.name pod['spec']['nodeSelector'] = {"kubernetes.io/hostname": pod_node_name} common.create_and_wait_pod(core_api, pod) client.update(n3, allowScheduling=False, evictionRequested=True) duplicate_node = [n1.name, n2.name] duplicate_node.remove(pod_node_name) for _ in range(RETRY_COUNTS): pod_node_r_count = common.get_host_replica_count(client, volume_name, pod_node_name, chk_running=True) duplicate_node_r_count = common.get_host_replica_count( client, volume_name, duplicate_node[0], chk_running=True) balance_node_r_count = common.get_host_replica_count( client, volume_name, n3.name, chk_running=False) if pod_node_r_count == duplicate_node_r_count == 1 and \ balance_node_r_count == 0: break time.sleep(RETRY_INTERVAL) assert pod_node_r_count == 1 assert duplicate_node_r_count == 1 assert balance_node_r_count == 0 client.update(n3, allowScheduling=True) for _ in range(RETRY_COUNTS): pod_node_r_count = common.get_host_replica_count(client, volume_name, pod_node_name, chk_running=True) duplicate_node_r_count = common.get_host_replica_count( client, volume_name, duplicate_node[0], chk_running=False) balance_node_r_count = common.get_host_replica_count( client, volume_name, n3.name, chk_running=True) if pod_node_r_count == balance_node_r_count == 1 and \ duplicate_node_r_count == 0: break time.sleep(RETRY_INTERVAL) assert pod_node_r_count == 1 assert duplicate_node_r_count == 0 assert balance_node_r_count == 1 z1_r_count = get_zone_replica_count(client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count(client, volume_name, ZONE2, chk_running=True) assert z1_r_count == z2_r_count == 1 # loop 3 times and each to wait 5 seconds to ensure there is no # re-scheduling happening. for _ in range(3): time.sleep(5) assert pod_node_r_count == common.get_host_replica_count( client, volume_name, pod_node_name, chk_running=True) assert duplicate_node_r_count == common.get_host_replica_count( client, volume_name, duplicate_node[0], chk_running=False) assert balance_node_r_count == common.get_host_replica_count( client, volume_name, n3.name, chk_running=True) common.delete_and_wait_pod(core_api, pod['metadata']['name'])
def test_pvc_creation_with_default_sc_set(client, core_api, storage_class, pod): # NOQA # set default storage class storage_class['metadata']['annotations'] = \ {"storageclass.kubernetes.io/is-default-class": "true"} create_storage_class(storage_class) static_sc_name = "longhorn-static-test" setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC) setting = client.update(setting, value=static_sc_name) assert setting["value"] == static_sc_name volume_name = "test-pvc-creation-with-sc" pod_name = "pod-" + volume_name client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name pvc_name_extra = "pvc-" + volume_name + "-extra" create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') for item in ret.items: if item.metadata.name == pvc_name: pvc_found = item break assert pvc_found assert pvc_found.spec.storage_class_name == static_sc_name pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) ks = { 'pvName': pv_name, 'pvStatus': 'Bound', 'namespace': 'default', 'pvcName': pvc_name, 'lastPVCRefAt': '', 'lastPodRefAt': '', 'workloadsStatus': [ { 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }, ], } wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) # try to reuse the pv volume = wait_for_volume_detached(client, volume_name) create_pvc_for_volume(client, core_api, volume, pvc_name_extra) pod['spec']['volumes'][0]['persistentVolumeClaim']['claimName'] = \ pvc_name_extra create_and_wait_pod(core_api, pod) ks['pvcName'] = pvc_name_extra wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name_extra) delete_and_wait_pv(core_api, pv_name) # without default storage class delete_storage_class(storage_class['metadata']['name']) create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') for item in ret.items: if item.metadata.name == pvc_name: pvc2 = item break assert pvc2 assert pvc2.spec.storage_class_name == static_sc_name delete_and_wait_pvc(core_api, pvc_name) delete_and_wait_pv(core_api, pv_name)
def test_offline_node_with_attached_volume_and_pod( client, core_api, volume_name, make_deployment_with_pvc, reset_cluster_ready_status): # NOQA """ Test offline node with attached volume and pod 1. Create PV/PVC/Deployment manifest. 2. Update deployment's tolerations to 20 seconds to speed up test 3. Update deployment's node affinity rule to avoid the current node 4. Create volume, PV/PVC and deployment. 5. Find the pod in the deployment and write `test_data` into it 6. Shutdown the node pod is running on 7. Wait for deployment to delete the pod 1. Deployment cannot delete the pod here because kubelet doesn't response 8. Force delete the terminating pod 9. Wait for the new pod to be created and the volume attached 10. Check `test_data` in the new pod """ toleration_seconds = 20 apps_api = get_apps_api_client() cloudprovider = detect_cloudprovider() volume_name = generate_volume_name() pv_name = volume_name + "-pv" pvc_name = volume_name + "-pvc" deployment_name = volume_name + "-dep" longhorn_test_node_name = get_self_host_id() deployment_manifest = make_deployment_with_pvc(deployment_name, pvc_name) unreachable_toleration = { "key": "node.kubernetes.io/unreachable", "operator": "Exists", "effect": "NoExecute", "tolerationSeconds": toleration_seconds } not_ready_toleration = { "key": "node.kubernetes.io/not-ready", "operator": "Exists", "effect": "NoExecute", "tolerationSeconds": toleration_seconds } deployment_manifest["spec"]["template"]["spec"]["tolerations"] =\ [unreachable_toleration, not_ready_toleration] node_affinity_roles = { "nodeAffinity": { "requiredDuringSchedulingIgnoredDuringExecution": { "nodeSelectorTerms": [{ "matchExpressions": [{ "key": "kubernetes.io/hostname", "operator": "NotIn", "values": [longhorn_test_node_name] }] }] } } } deployment_manifest["spec"]["template"]["spec"]["affinity"] =\ node_affinity_roles longhorn_volume = create_and_check_volume(client, volume_name, size=SIZE) wait_for_volume_detached(client, volume_name) create_pv_for_volume(client, core_api, longhorn_volume, pv_name) create_pvc_for_volume(client, core_api, longhorn_volume, pvc_name) create_and_wait_deployment(apps_api, deployment_manifest) deployment_label_selector =\ "name=" + deployment_manifest["metadata"]["labels"]["name"] deployment_pod_list =\ core_api.list_namespaced_pod(namespace="default", label_selector=deployment_label_selector) assert deployment_pod_list.items.__len__() == 1 pod_name = deployment_pod_list.items[0].metadata.name test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data) node_name = deployment_pod_list.items[0].spec.node_name node = cloudprovider.node_id(node_name) cloudprovider.node_shutdown(node) k8s_node_down = wait_for_node_down_k8s(node_name, core_api) assert k8s_node_down client = get_longhorn_api_client() longhorn_node_down = wait_for_node_down_longhorn(node_name, client) assert longhorn_node_down time.sleep(toleration_seconds + 5) for i in range(TERMINATING_POD_RETRYS): deployment_pod_list =\ core_api.list_namespaced_pod( namespace="default", label_selector=deployment_label_selector ) terminating_pod_name = None for pod in deployment_pod_list.items: if pod.metadata.__getattribute__("deletion_timestamp") is not None: terminating_pod_name = pod.metadata.name break if terminating_pod_name is not None: break else: time.sleep(TERMINATING_POD_INTERVAL) assert terminating_pod_name is not None core_api.delete_namespaced_pod(namespace="default", name=terminating_pod_name, grace_period_seconds=0) delete_and_wait_pod(core_api, terminating_pod_name) deployment_pod_list =\ core_api.list_namespaced_pod( namespace="default", label_selector=deployment_label_selector ) assert deployment_pod_list.items.__len__() == 1 wait_for_volume_detached(client, volume_name) wait_for_volume_healthy(client, volume_name) deployment_pod_list =\ core_api.list_namespaced_pod( namespace="default", label_selector=deployment_label_selector ) assert deployment_pod_list.items.__len__() == 1 new_pod_name = deployment_pod_list.items[0].metadata.name wait_pod(new_pod_name) resp_data = read_volume_data(core_api, new_pod_name) assert test_data == resp_data
def test_rwx_delete_share_manager_pod(core_api, statefulset): # NOQA """ Test moving of Share manager pod from one node to another. 1. Create a StatefulSet of 1 pod with VolumeClaimTemplate where accessMode is 'RWX'. 2. Wait for StatefulSet to come up healthy. 3. Write data and compute md5sum. 4. Delete the share manager pod. 5. Wait for a new pod to be created and volume getting attached. 6. Check the data md5sum in statefulSet. 7. Write more data to it and compute md5sum. 8. Check the data md5sum in share manager volume. """ statefulset_name = 'statefulset-delete-share-manager-pods-test' statefulset['metadata']['name'] = \ statefulset['spec']['selector']['matchLabels']['app'] = \ statefulset['spec']['serviceName'] = \ statefulset['spec']['template']['metadata']['labels']['app'] = \ statefulset_name statefulset['spec']['replicas'] = 1 statefulset['spec']['volumeClaimTemplates'][0]['spec']['storageClassName']\ = 'longhorn' statefulset['spec']['volumeClaimTemplates'][0]['spec']['accessModes'] \ = ['ReadWriteMany'] create_and_wait_statefulset(statefulset) pod_name = statefulset_name + '-' + '0' pvc_name = \ statefulset['spec']['volumeClaimTemplates'][0]['metadata']['name'] \ + '-' + statefulset_name + '-0' pv_name = get_volume_name(core_api, pvc_name) share_manager_name = 'share-manager-' + pv_name test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data, filename='test1') delete_and_wait_pod(core_api, share_manager_name, namespace=LONGHORN_NAMESPACE) target_pod = core_api.read_namespaced_pod(name=pod_name, namespace='default') wait_delete_pod(core_api, target_pod.metadata.uid) wait_for_pod_remount(core_api, pod_name) test_data_2 = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data_2, filename='test2') command1 = 'cat /export/' + pv_name + '/test1' share_manager_data_1 = exec_command_in_pod(core_api, command1, share_manager_name, LONGHORN_NAMESPACE) assert test_data == share_manager_data_1 command2 = 'cat /export/' + pv_name + '/test2' share_manager_data_2 = exec_command_in_pod(core_api, command2, share_manager_name, LONGHORN_NAMESPACE) assert test_data_2 == share_manager_data_2
def test_kubernetes_status( client, core_api, storage_class, # NOQA statefulset, csi_pv, pvc, pod): # NOQA statefulset_name = 'kubernetes-status-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) storage_class['reclaimPolicy'] = 'Retain' create_storage_class(storage_class) create_and_wait_statefulset(statefulset) pod_info = get_statefulset_pod_info(core_api, statefulset) volume_info = [p['pv_name'] for p in pod_info] extra_pod_name = 'extra-pod-using-' + volume_info[1] pod['metadata']['name'] = extra_pod_name p2 = core_api.read_namespaced_pod(name=pod_info[1]['pod_name'], namespace='default') pod['spec']['nodeName'] = p2.spec.node_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pod_info[1]['pvc_name'], }, }] create_and_wait_pod(core_api, pod) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == p['pv_name'] assert k_status['pvStatus'] == 'Bound' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] if i == 0: assert len(workloads) == 1 assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' for _ in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert workloads[0]['podStatus'] == 'Running' if i == 1: assert len(k_status['workloadsStatus']) == 2 if workloads[0]['podName'] == pod_info[i]['pod_name']: assert workloads[1]['podName'] == extra_pod_name assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' assert not workloads[1]['workloadName'] assert not workloads[1]['workloadType'] else: assert workloads[1]['podName'] == pod_info[i]['pod_name'] assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert workloads[1]['workloadName'] == statefulset_name assert workloads[1]['workloadType'] == 'StatefulSet' for _ in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running' and \ workloads[1]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 2 assert workloads[0]['podStatus'] == 'Running' assert workloads[1]['podStatus'] == 'Running' ks_list = [{}, {}] delete_and_wait_statefulset_only(core_api, statefulset) # the extra pod is still using the 2nd volume for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] ks_list[i]['pvName'] = p['pv_name'] ks_list[i]['pvStatus'] = 'Bound' ks_list[i]['namespace'] = 'default' ks_list[i]['pvcName'] = p['pvc_name'] ks_list[i]['lastPVCRefAt'] = '' if i == 0: ks_list[i]['lastPodRefAt'] = 'not empty' ks_list[i]['workloadsStatus'] = [ { 'podName': p['pod_name'], 'podStatus': 'Running', 'workloadName': statefulset_name, 'workloadType': 'StatefulSet', }, ] if i == 1: ks_list[i]['lastPodRefAt'] = '' ks_list[i]['workloadsStatus'] = [{ 'podName': extra_pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }] wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # deleted extra_pod, all volumes have no workload delete_and_wait_pod(core_api, pod['metadata']['name']) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] ks_list[i]['lastPodRefAt'] = 'not empty' wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # deleted pvc only. for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] delete_and_wait_pvc(core_api, p['pvc_name']) ks_list[i]['pvStatus'] = 'Released' ks_list[i]['lastPVCRefAt'] = 'not empty' wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # deleted pv only. for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] delete_and_wait_pv(core_api, p['pv_name']) ks_list[i]['pvName'] = '' ks_list[i]['pvStatus'] = '' wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # reuse that volume for p, volume_name in zip(pod_info, volume_info): p['pod_name'] = p['pod_name'].replace('kubernetes-status-test', 'kubernetes-status-test-reuse') p['pvc_name'] = p['pvc_name'].replace('kubernetes-status-test', 'kubernetes-status-test-reuse') p['pv_name'] = p['pvc_name'] csi_pv['metadata']['name'] = p['pv_name'] csi_pv['spec']['csi']['volumeHandle'] = volume_name core_api.create_persistent_volume(csi_pv) pvc['metadata']['name'] = p['pvc_name'] pvc['spec']['volumeName'] = p['pv_name'] core_api.create_namespaced_persistent_volume_claim(body=pvc, namespace='default') pod['metadata']['name'] = p['pod_name'] pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': p['pvc_name'], }, }] create_and_wait_pod(core_api, pod) ks = { 'pvName': p['pv_name'], 'pvStatus': 'Bound', 'namespace': 'default', 'pvcName': p['pvc_name'], 'lastPVCRefAt': '', 'lastPodRefAt': '', 'workloadsStatus': [ { 'podName': p['pod_name'], 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }, ], } wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pod(core_api, p['pod_name']) # Since persistentVolumeReclaimPolicy of csi_pv is `Delete`, # we don't need to delete bounded pv manually delete_and_wait_pvc(core_api, p['pvc_name']) wait_delete_pv(core_api, p['pv_name'])
def test_csi_block_volume(client, core_api, storage_class, pvc, pod_manifest): # NOQA """ Test CSI feature: raw block volume 1. Create a PVC with `volumeMode = Block` 2. Create a pod using the PVC to dynamic provision a volume 3. Verify the pod creation 4. Generate `test_data` and write to the block volume directly in the pod 5. Read the data back for validation 6. Delete the pod and create `pod2` to use the same volume 7. Validate the data in `pod2` is consistent with `test_data` """ pod_name = 'csi-block-volume-test' pvc_name = pod_name + "-pvc" device_path = "/dev/longhorn/longhorn-test-blk" storage_class['reclaimPolicy'] = 'Retain' pvc['metadata']['name'] = pvc_name pvc['spec']['volumeMode'] = 'Block' pvc['spec']['storageClassName'] = storage_class['metadata']['name'] pvc['spec']['resources'] = { 'requests': { 'storage': size_to_string(1 * Gi) } } pod_manifest['metadata']['name'] = pod_name pod_manifest['spec']['volumes'] = [{ 'name': 'longhorn-blk', 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] pod_manifest['spec']['containers'][0]['volumeMounts'] = [] pod_manifest['spec']['containers'][0]['volumeDevices'] = [ {'name': 'longhorn-blk', 'devicePath': device_path} ] create_storage_class(storage_class) create_pvc(pvc) pv_name = wait_and_get_pv_for_pvc(core_api, pvc_name).metadata.name create_and_wait_pod(core_api, pod_manifest) test_data = generate_random_data(VOLUME_RWTEST_SIZE) test_offset = random.randint(0, VOLUME_RWTEST_SIZE) write_pod_block_volume_data( core_api, pod_name, test_data, test_offset, device_path) returned_data = read_pod_block_volume_data( core_api, pod_name, len(test_data), test_offset, device_path ) assert test_data == returned_data md5_sum = get_pod_data_md5sum( core_api, pod_name, device_path) delete_and_wait_pod(core_api, pod_name) common.wait_for_volume_detached(client, pv_name) pod_name_2 = 'csi-block-volume-test-reuse' pod_manifest['metadata']['name'] = pod_name_2 create_and_wait_pod(core_api, pod_manifest) returned_data = read_pod_block_volume_data( core_api, pod_name_2, len(test_data), test_offset, device_path ) assert test_data == returned_data md5_sum_2 = get_pod_data_md5sum( core_api, pod_name_2, device_path) assert md5_sum == md5_sum_2 delete_and_wait_pod(core_api, pod_name_2) delete_and_wait_pvc(core_api, pvc_name) delete_and_wait_pv(core_api, pv_name)
def test_kubernetes_status(client, core_api, storage_class, # NOQA statefulset, csi_pv, pvc, pod): # NOQA statefulset_name = 'kubernetes-status-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) storage_class['reclaimPolicy'] = 'Retain' create_storage_class(storage_class) create_and_wait_statefulset(statefulset) pod_info = get_statefulset_pod_info(core_api, statefulset) volume_info = [p['pv_name'] for p in pod_info] extra_pod_name = 'extra-pod-using-' + volume_info[1] pod['metadata']['name'] = extra_pod_name p2 = core_api.read_namespaced_pod(name=pod_info[1]['pod_name'], namespace='default') pod['spec']['nodeName'] = p2.spec.node_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pod_info[1]['pvc_name'], }, }] create_and_wait_pod(core_api, pod) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == p['pv_name'] assert k_status['pvStatus'] == 'Bound' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] if i == 0: assert len(workloads) == 1 assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' for _ in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert workloads[0]['podStatus'] == 'Running' if i == 1: assert len(k_status['workloadsStatus']) == 2 if workloads[0]['podName'] == pod_info[i]['pod_name']: assert workloads[1]['podName'] == extra_pod_name assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' assert not workloads[1]['workloadName'] assert not workloads[1]['workloadType'] else: assert workloads[1]['podName'] == pod_info[i]['pod_name'] assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert workloads[1]['workloadName'] == statefulset_name assert workloads[1]['workloadType'] == 'StatefulSet' for _ in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running' and \ workloads[1]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 2 assert workloads[0]['podStatus'] == 'Running' assert workloads[1]['podStatus'] == 'Running' # the extra pod is still using the 2nd volume delete_and_wait_statefulset_only(core_api, statefulset) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == p['pv_name'] assert k_status['pvStatus'] == 'Bound' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert not k_status['lastPVCRefAt'] assert len(workloads) == 1 if i == 0: assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' assert k_status['lastPodRefAt'] if i == 1: assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert not k_status['lastPodRefAt'] # deleted extra_pod, all volumes have no workload delete_and_wait_pod(core_api, pod['metadata']['name']) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == p['pv_name'] assert k_status['pvStatus'] == 'Bound' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert not k_status['lastPVCRefAt'] assert k_status['lastPodRefAt'] assert len(workloads) == 1 if i == 0: assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' if i == 1: assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] # deleted pvc only. for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] delete_and_wait_pvc(core_api, p['pvc_name']) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] for _ in range(RETRY_COUNTS): if k_status['pvStatus'] == 'Released': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == p['pv_name'] assert k_status['pvStatus'] == 'Released' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert k_status['lastPVCRefAt'] assert k_status['lastPodRefAt'] assert len(workloads) == 1 if i == 0: assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' if i == 1: assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] # deleted pv only. for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] delete_and_wait_pv(core_api, p['pv_name']) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == '' assert k_status['pvStatus'] == '' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert k_status['lastPVCRefAt'] assert k_status['lastPodRefAt'] assert len(workloads) == 1 if i == 0: assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' if i == 1: assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] # reuse that volume for p, volume_name in zip(pod_info, volume_info): p['pod_name'] = p['pod_name'].replace('kubernetes-status-test', 'kubernetes-status-test-reuse') p['pvc_name'] = p['pvc_name'].replace('kubernetes-status-test', 'kubernetes-status-test-reuse') p['pv_name'] = p['pvc_name'] csi_pv['metadata']['name'] = p['pv_name'] csi_pv['spec']['csi']['volumeHandle'] = volume_name core_api.create_persistent_volume(csi_pv) pvc['metadata']['name'] = p['pvc_name'] pvc['spec']['volumeName'] = p['pv_name'] core_api.create_namespaced_persistent_volume_claim( body=pvc, namespace='default') pod['metadata']['name'] = p['pod_name'] pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': p['pvc_name'], }, }] create_and_wait_pod(core_api, pod) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 1 assert k_status['pvName'] == p['pv_name'] for _ in range(RETRY_COUNTS): if k_status['pvStatus'] == 'Bound': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 1 assert k_status['pvStatus'] == 'Bound' for _ in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 1 assert workloads[0]['podStatus'] == 'Running' assert workloads[0]['podName'] == p['pod_name'] assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] delete_and_wait_pod(core_api, p['pod_name']) # Since persistentVolumeReclaimPolicy of csi_pv is `Delete`, # we don't need to delete bounded pv manually delete_and_wait_pvc(core_api, p['pvc_name']) wait_delete_pv(core_api, p['pv_name'])
def test_pvc_creation(client, core_api, pod): # NOQA volume_name = "test-pvc-creation" client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name pod_name = "pod-" + volume_name # try to create pvc without pv for the volume with pytest.raises(Exception) as e: volume.pvcCreate(namespace="default", pvcName=pvc_name) assert "connot find existing PV for volume" in str(e.value) volume.pvCreate(pvName=pv_name) for i in range(RETRY_COUNTS): if check_pv_existence(core_api, pv_name): break time.sleep(RETRY_INTERVAL) assert check_pv_existence(core_api, pv_name) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] for i in range(RETRY_COUNTS): if k_status['pvName'] and k_status['pvStatus'] == 'Available': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] assert k_status['pvName'] == pv_name assert k_status['pvStatus'] == 'Available' assert not k_status['namespace'] assert not k_status['pvcName'] assert not k_status['workloadsStatus'] assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] volume.pvcCreate(namespace="default", pvcName=pvc_name) for i in range(RETRY_COUNTS): if check_pvc_existence(core_api, pvc_name): break time.sleep(RETRY_INTERVAL) assert check_pvc_existence(core_api, pvc_name) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] for i in range(RETRY_COUNTS): if k_status['pvcName'] and k_status['namespace']: break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] assert k_status['pvName'] == pv_name assert k_status['pvStatus'] == 'Bound' assert k_status['namespace'] == "default" assert k_status['pvcName'] == pvc_name assert not k_status['workloadsStatus'] assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == pv_name assert k_status['pvStatus'] == 'Bound' assert len(workloads) == 1 for i in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 1 assert workloads[0]['podName'] == pod_name assert workloads[0]['podStatus'] == 'Running' assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert k_status['namespace'] == 'default' assert k_status['pvcName'] == pvc_name assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) wait_delete_pv(core_api, pv_name)
def finalizer(): api = get_core_api_client() delete_and_wait_pod(api, pod_manifest['metadata']['name'])
def restore_inc_test(client, core_api, volume_name, pod): # NOQA std_volume = create_and_check_volume(client, volume_name, 2, SIZE) lht_host_id = get_self_host_id() std_volume.attach(hostId=lht_host_id) std_volume = common.wait_for_volume_healthy(client, volume_name) with pytest.raises(Exception) as e: std_volume.activate(frontend="blockdev") assert "already in active mode" in str(e.value) data0 = {'len': 4 * 1024, 'pos': 0} data0['content'] = common.generate_random_data(data0['len']) bv, backup0, _, data0 = create_backup(client, volume_name, data0) sb_volume0_name = "sb-0-" + volume_name sb_volume1_name = "sb-1-" + volume_name sb_volume2_name = "sb-2-" + volume_name client.create_volume(name=sb_volume0_name, size=SIZE, numberOfReplicas=2, fromBackup=backup0['url'], frontend="", standby=True) client.create_volume(name=sb_volume1_name, size=SIZE, numberOfReplicas=2, fromBackup=backup0['url'], frontend="", standby=True) client.create_volume(name=sb_volume2_name, size=SIZE, numberOfReplicas=2, fromBackup=backup0['url'], frontend="", standby=True) common.wait_for_volume_restoration_completed(client, sb_volume0_name) common.wait_for_volume_restoration_completed(client, sb_volume1_name) common.wait_for_volume_restoration_completed(client, sb_volume2_name) sb_volume0 = common.wait_for_volume_healthy(client, sb_volume0_name) sb_volume1 = common.wait_for_volume_healthy(client, sb_volume1_name) sb_volume2 = common.wait_for_volume_healthy(client, sb_volume2_name) for i in range(RETRY_COUNTS): sb_volume0 = client.by_id_volume(sb_volume0_name) sb_volume1 = client.by_id_volume(sb_volume1_name) sb_volume2 = client.by_id_volume(sb_volume2_name) sb_engine0 = get_volume_engine(sb_volume0) sb_engine1 = get_volume_engine(sb_volume1) sb_engine2 = get_volume_engine(sb_volume2) if sb_volume0["lastBackup"] != backup0["name"] or \ sb_volume1["lastBackup"] != backup0["name"] or \ sb_volume2["lastBackup"] != backup0["name"] or \ sb_engine0["lastRestoredBackup"] != backup0["name"] or \ sb_engine1["lastRestoredBackup"] != backup0["name"] or \ sb_engine2["lastRestoredBackup"] != backup0["name"]: time.sleep(RETRY_INTERVAL) else: break assert sb_volume0["standby"] is True assert sb_volume0["lastBackup"] == backup0["name"] assert sb_volume0["frontend"] == "" assert sb_volume0["disableFrontend"] is True assert sb_volume0["initialRestorationRequired"] is False sb_engine0 = get_volume_engine(sb_volume0) assert sb_engine0["lastRestoredBackup"] == backup0["name"] assert sb_engine0["requestedBackupRestore"] == backup0["name"] assert sb_volume1["standby"] is True assert sb_volume1["lastBackup"] == backup0["name"] assert sb_volume1["frontend"] == "" assert sb_volume1["disableFrontend"] is True assert sb_volume1["initialRestorationRequired"] is False sb_engine1 = get_volume_engine(sb_volume1) assert sb_engine1["lastRestoredBackup"] == backup0["name"] assert sb_engine1["requestedBackupRestore"] == backup0["name"] assert sb_volume2["standby"] is True assert sb_volume2["lastBackup"] == backup0["name"] assert sb_volume2["frontend"] == "" assert sb_volume2["disableFrontend"] is True assert sb_volume2["initialRestorationRequired"] is False sb_engine2 = get_volume_engine(sb_volume2) assert sb_engine2["lastRestoredBackup"] == backup0["name"] assert sb_engine2["requestedBackupRestore"] == backup0["name"] sb0_snaps = sb_volume0.snapshotList() assert len(sb0_snaps) == 2 for s in sb0_snaps: if s['name'] != "volume-head": sb0_snap = s assert sb0_snaps with pytest.raises(Exception) as e: sb_volume0.snapshotCreate() assert "cannot create snapshot for standby volume" in str(e.value) with pytest.raises(Exception) as e: sb_volume0.snapshotRevert(name=sb0_snap["name"]) assert "cannot revert snapshot for standby volume" in str(e.value) with pytest.raises(Exception) as e: sb_volume0.snapshotDelete(name=sb0_snap["name"]) assert "cannot delete snapshot for standby volume" in str(e.value) with pytest.raises(Exception) as e: sb_volume0.snapshotBackup(name=sb0_snap["name"]) assert "cannot create backup for standby volume" in str(e.value) with pytest.raises(Exception) as e: sb_volume0.pvCreate(pvName=sb_volume0_name) assert "cannot create PV for standby volume" in str(e.value) with pytest.raises(Exception) as e: sb_volume0.pvcCreate(pvcName=sb_volume0_name) assert "cannot create PVC for standby volume" in str(e.value) setting = client.by_id_setting(common.SETTING_BACKUP_TARGET) with pytest.raises(Exception) as e: client.update(setting, value="random.backup.target") assert "cannot modify BackupTarget " \ "since there are existing standby volumes" in str(e.value) with pytest.raises(Exception) as e: sb_volume0.activate(frontend="wrong_frontend") assert "invalid frontend" in str(e.value) activate_standby_volume(client, sb_volume0_name) sb_volume0 = client.by_id_volume(sb_volume0_name) sb_volume0.attach(hostId=lht_host_id) sb_volume0 = common.wait_for_volume_healthy(client, sb_volume0_name) check_volume_data(sb_volume0, data0, False) zero_string = b'\x00'.decode('utf-8') _, backup1, _, data1 = create_backup(client, volume_name, { 'len': 2 * 1024, 'pos': 0, 'content': zero_string * 2 * 1024 }) # use this api to update field `last backup` client.list_backupVolume() check_volume_last_backup(client, sb_volume1_name, backup1['name']) activate_standby_volume(client, sb_volume1_name) sb_volume1 = client.by_id_volume(sb_volume1_name) sb_volume1.attach(hostId=lht_host_id) sb_volume1 = common.wait_for_volume_healthy(client, sb_volume1_name) data0_modified = { 'len': data0['len'] - data1['len'], 'pos': data1['len'], 'content': data0['content'][data1['len']:], } check_volume_data(sb_volume1, data0_modified, False) check_volume_data(sb_volume1, data1) data2 = {'len': 1 * 1024 * 1024, 'pos': 0} data2['content'] = common.generate_random_data(data2['len']) _, backup2, _, data2 = create_backup(client, volume_name, data2) client.list_backupVolume() check_volume_last_backup(client, sb_volume2_name, backup2['name']) activate_standby_volume(client, sb_volume2_name) sb_volume2 = client.by_id_volume(sb_volume2_name) sb_volume2.attach(hostId=lht_host_id) sb_volume2 = common.wait_for_volume_healthy(client, sb_volume2_name) check_volume_data(sb_volume2, data2) # allocated this active volume to a pod sb_volume2.detach() sb_volume2 = common.wait_for_volume_detached(client, sb_volume2_name) create_pv_for_volume(client, core_api, sb_volume2, sb_volume2_name) create_pvc_for_volume(client, core_api, sb_volume2, sb_volume2_name) sb_volume2_pod_name = "pod-" + sb_volume2_name pod['metadata']['name'] = sb_volume2_pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': sb_volume2_name, }, }] create_and_wait_pod(core_api, pod) sb_volume2 = client.by_id_volume(sb_volume2_name) k_status = sb_volume2["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == sb_volume2_name assert k_status['pvStatus'] == 'Bound' assert len(workloads) == 1 for i in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) sb_volume2 = client.by_id_volume(sb_volume2_name) k_status = sb_volume2["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 1 assert workloads[0]['podName'] == sb_volume2_pod_name assert workloads[0]['podStatus'] == 'Running' assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert k_status['namespace'] == 'default' assert k_status['pvcName'] == sb_volume2_name assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] delete_and_wait_pod(core_api, sb_volume2_pod_name) delete_and_wait_pvc(core_api, sb_volume2_name) delete_and_wait_pv(core_api, sb_volume2_name) # cleanup std_volume.detach() sb_volume0.detach() sb_volume1.detach() std_volume = common.wait_for_volume_detached(client, volume_name) sb_volume0 = common.wait_for_volume_detached(client, sb_volume0_name) sb_volume1 = common.wait_for_volume_detached(client, sb_volume1_name) sb_volume2 = common.wait_for_volume_detached(client, sb_volume2_name) bv.backupDelete(name=backup2["name"]) bv.backupDelete(name=backup1["name"]) bv.backupDelete(name=backup0["name"]) client.delete(std_volume) client.delete(sb_volume0) client.delete(sb_volume1) client.delete(sb_volume2) wait_for_volume_delete(client, volume_name) wait_for_volume_delete(client, sb_volume0_name) wait_for_volume_delete(client, sb_volume1_name) wait_for_volume_delete(client, sb_volume2_name) volumes = client.list_volume() assert len(volumes) == 0
def test_engine_live_upgrade_with_intensive_data_writing( client, core_api, volume_name, pod_make): # NOQA """ Test engine live upgrade with intensive data writing 1. Deploy a compatible new engine image 2. Create a volume(with the old default engine image) with /PV/PVC/Pod and wait for pod to be deployed. 3. Write data to a tmp file in the pod and get the md5sum 4. Upgrade the volume to the new engine image without waiting. 5. Keep copying data from the tmp file to the volume during the live upgrade. 6. Wait until the upgrade completed, verify the volume engine image changed 7. Wait for new replica mode update then check the engine status. 8. Verify all engine and replicas' engine image changed 9. Verify the reference count of the new engine image changed 10. Check the existing data. Then write new data to the upgraded volume and get the md5sum. 11. Delete the pod and wait for the volume detached. Then check engine and replicas's engine image again. 12. Recreate the pod. 13. Check if the attached volume is state `healthy` rather than `degraded`. 14. Check the data. """ default_img = common.get_default_engine_image(client) default_img_name = default_img.name default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) cli_v = default_img.cliAPIVersion cli_minv = default_img.cliAPIMinVersion ctl_v = default_img.controllerAPIVersion ctl_minv = default_img.controllerAPIMinVersion data_v = default_img.dataFormatVersion data_minv = default_img.dataFormatMinVersion engine_upgrade_image = common.get_upgrade_test_image( cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv) new_img = client.create_engine_image(image=engine_upgrade_image) new_img_name = new_img.name ei_status_value = get_engine_image_status_value(client, new_img_name) new_img = wait_for_engine_image_state(client, new_img_name, ei_status_value) assert new_img.refCount == 0 assert new_img.noRefSince != "" default_img = common.get_default_engine_image(client) default_img_name = default_img.name pod_name = volume_name + "-pod" pv_name = volume_name + "-pv" pvc_name = volume_name + "-pvc" pod = pod_make(name=pod_name) volume = create_and_check_volume(client, volume_name, num_of_replicas=3, size=str(1 * Gi)) original_engine_image = volume.engineImage assert original_engine_image != engine_upgrade_image create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) pod['spec']['volumes'] = [create_pvc_spec(pvc_name)] create_and_wait_pod(core_api, pod) volume = client.by_id_volume(volume_name) assert volume.engineImage == original_engine_image assert volume.currentImage == original_engine_image engine = get_volume_engine(volume) assert engine.engineImage == original_engine_image assert engine.currentImage == original_engine_image for replica in volume.replicas: assert replica.engineImage == original_engine_image assert replica.currentImage == original_engine_image data_path0 = "/tmp/test" data_path1 = "/data/test1" write_pod_volume_random_data(core_api, pod_name, data_path0, RANDOM_DATA_SIZE_LARGE) original_md5sum1 = get_pod_data_md5sum(core_api, pod_name, data_path0) volume.engineUpgrade(image=engine_upgrade_image) # Keep writing data to the volume during the live upgrade copy_pod_volume_data(core_api, pod_name, data_path0, data_path1) # Wait for live upgrade complete wait_for_volume_current_image(client, volume_name, engine_upgrade_image) volume = wait_for_volume_replicas_mode(client, volume_name, "RW") engine = get_volume_engine(volume) assert engine.engineImage == engine_upgrade_image check_volume_endpoint(volume) wait_for_engine_image_ref_count(client, default_img_name, 0) wait_for_engine_image_ref_count(client, new_img_name, 1) volume_file_md5sum1 = get_pod_data_md5sum(core_api, pod_name, data_path1) assert volume_file_md5sum1 == original_md5sum1 data_path2 = "/data/test2" write_pod_volume_random_data(core_api, pod_name, data_path2, RANDOM_DATA_SIZE_SMALL) original_md5sum2 = get_pod_data_md5sum(core_api, pod_name, data_path2) delete_and_wait_pod(core_api, pod_name) volume = wait_for_volume_detached(client, volume_name) assert len(volume.replicas) == 3 assert volume.engineImage == engine_upgrade_image engine = get_volume_engine(volume) assert engine.engineImage == engine_upgrade_image for replica in volume.replicas: assert replica.engineImage == engine_upgrade_image create_and_wait_pod(core_api, pod) common.wait_for_volume_healthy(client, volume_name) volume_file_md5sum1 = get_pod_data_md5sum(core_api, pod_name, data_path1) assert volume_file_md5sum1 == original_md5sum1 volume_file_md5sum2 = get_pod_data_md5sum(core_api, pod_name, data_path2) assert volume_file_md5sum2 == original_md5sum2
def test_csi_expansion_with_replica_failure(client, core_api, storage_class, pvc, pod_manifest): # NOQA """ Test expansion success but with one replica expansion failure 1. Create a new `storage_class` with `allowVolumeExpansion` set 2. Create PVC and Pod with dynamic provisioned volume from the StorageClass 3. Create an empty directory with expansion snapshot tmp meta file path for one replica so that the replica expansion will fail 4. Generate `test_data` and write to the pod 5. Delete the pod and wait for volume detachment 6. Update pvc.spec.resources to expand the volume 7. Check expansion result using Longhorn API. There will be expansion error caused by the failed replica but overall the expansion should succeed. 8. Create a new pod and check if the volume will rebuild the failed replica 9. Validate the volume content, then check if data writing looks fine """ create_storage_class(storage_class) pod_name = 'csi-expansion-with-replica-failure-test' pvc_name = pod_name + "-pvc" pvc['metadata']['name'] = pvc_name pvc['spec']['storageClassName'] = storage_class['metadata']['name'] create_pvc(pvc) pod_manifest['metadata']['name'] = pod_name pod_manifest['spec']['volumes'] = [{ 'name': pod_manifest['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': {'claimName': pvc_name}, }] create_and_wait_pod(core_api, pod_manifest) expand_size = str(EXPANDED_VOLUME_SIZE*Gi) pv = wait_and_get_pv_for_pvc(core_api, pvc_name) assert pv.status.phase == "Bound" volume_name = pv.spec.csi.volume_handle volume = client.by_id_volume(volume_name) failed_replica = volume.replicas[0] fail_replica_expansion(client, core_api, volume_name, expand_size, [failed_replica]) test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) wait_for_volume_detached(client, volume_name) # There will be replica expansion error info # but the expansion should succeed. pvc['spec']['resources'] = { 'requests': { 'storage': size_to_string(EXPANDED_VOLUME_SIZE*Gi) } } expand_and_wait_for_pvc(core_api, pvc) wait_for_expansion_failure(client, volume_name) wait_for_volume_expansion(client, volume_name) volume = client.by_id_volume(volume_name) assert volume.state == "detached" assert volume.size == expand_size for r in volume.replicas: if r.name == failed_replica.name: assert r.failedAt != "" else: assert r.failedAt == "" # Check if the replica will be rebuilded # and if the volume still works fine. create_and_wait_pod(core_api, pod_manifest) volume = wait_for_volume_healthy(client, volume_name) for r in volume.replicas: if r.name == failed_replica.name: assert r.mode == "" else: assert r.mode == "RW" resp = read_volume_data(core_api, pod_name) assert resp == test_data test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def test_pvc_creation_with_default_sc_set(client, core_api, storage_class, pod): # NOQA """ Test creating PVC with default StorageClass set The target is to make sure the newly create PV/PVC won't use default StorageClass, and if there is no default StorageClass, PV/PVC can still be created. 1. Create a StorageClass and set it to be the default StorageClass 2. Update static StorageClass to `longhorn-static-test` 3. Create volume then PV/PVC. 4. Make sure the newly created PV/PVC using StorageClass `longhorn-static-test` 5. Create pod with PVC. 6. Verify volume's Kubernetes Status 7. Remove PVC and Pod. 8. Verify volume's Kubernetes Status only contains current PV and history 9. Wait for volume to detach (since pod is deleted) 10. Reuse the volume on a new pod. Wait for the pod to start 11. Verify volume's Kubernetes Status reflect the new pod. 12. Delete PV/PVC/Pod. 13. Verify volume's Kubernetes Status only contains history 14. Delete the default StorageClass. 15. Create PV/PVC for the volume. 16. Make sure the PV's StorageClass is static StorageClass """ # set default storage class storage_class['metadata']['annotations'] = \ {"storageclass.kubernetes.io/is-default-class": "true"} create_storage_class(storage_class) static_sc_name = "longhorn-static-test" setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC) setting = client.update(setting, value=static_sc_name) assert setting.value == static_sc_name volume_name = "test-pvc-creation-with-sc" # NOQA pod_name = "pod-" + volume_name client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name pvc_name_extra = "pvc-" + volume_name + "-extra" create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') for item in ret.items: if item.metadata.name == pvc_name: pvc_found = item break assert pvc_found assert pvc_found.spec.storage_class_name == static_sc_name pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) ks = { 'pvName': pv_name, 'pvStatus': 'Bound', 'namespace': 'default', 'pvcName': pvc_name, 'lastPVCRefAt': '', 'lastPodRefAt': '', 'workloadsStatus': [ { 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }, ], } wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) ks = { 'pvName': pv_name, 'pvStatus': 'Released', 'namespace': 'default', 'pvcName': pvc_name, 'lastPVCRefAt': 'not empty', 'lastPodRefAt': 'not empty', } wait_volume_kubernetes_status(client, volume_name, ks) # try to reuse the pv volume = wait_for_volume_detached(client, volume_name) create_pvc_for_volume(client, core_api, volume, pvc_name_extra) pod['spec']['volumes'][0]['persistentVolumeClaim']['claimName'] = \ pvc_name_extra create_and_wait_pod(core_api, pod) ks = { 'pvName': pv_name, 'pvStatus': 'Bound', 'namespace': 'default', 'pvcName': pvc_name_extra, 'lastPVCRefAt': '', 'lastPodRefAt': '', 'workloadsStatus': [ { 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }, ], } wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name_extra) delete_and_wait_pv(core_api, pv_name) ks = { 'pvName': '', 'pvStatus': '', 'namespace': 'default', 'pvcName': pvc_name_extra, 'lastPVCRefAt': 'not empty', 'lastPodRefAt': 'not empty', } wait_volume_kubernetes_status(client, volume_name, ks) # without default storage class delete_storage_class(storage_class['metadata']['name']) create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') for item in ret.items: if item.metadata.name == pvc_name: pvc2 = item break assert pvc2 assert pvc2.spec.storage_class_name == static_sc_name delete_and_wait_pvc(core_api, pvc_name) delete_and_wait_pv(core_api, pv_name)
def test_csi_offline_expansion(client, core_api, storage_class, pvc, pod_manifest): # NOQA """ Test CSI feature: offline expansion 1. Create a new `storage_class` with `allowVolumeExpansion` set 2. Create PVC and Pod with dynamic provisioned volume from the StorageClass 3. Generate `test_data` and write to the pod 4. Delete the pod 5. Update pvc.spec.resources to expand the volume 6. Verify the volume expansion done using Longhorn API 7. Create a new pod and validate the volume content """ create_storage_class(storage_class) pod_name = 'csi-offline-expand-volume-test' pvc_name = pod_name + "-pvc" pvc['metadata']['name'] = pvc_name pvc['spec']['storageClassName'] = storage_class['metadata']['name'] create_pvc(pvc) pod_manifest['metadata']['name'] = pod_name pod_manifest['spec']['volumes'] = [{ 'name': pod_manifest['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': {'claimName': pvc_name}, }] create_and_wait_pod(core_api, pod_manifest) test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) pv = wait_and_get_pv_for_pvc(core_api, pvc_name) assert pv.status.phase == "Bound" volume_name = pv.spec.csi.volume_handle wait_for_volume_detached(client, volume_name) pvc['spec']['resources'] = { 'requests': { 'storage': size_to_string(EXPANDED_VOLUME_SIZE*Gi) } } expand_and_wait_for_pvc(core_api, pvc) wait_for_volume_expansion(client, volume_name) volume = client.by_id_volume(volume_name) assert volume.state == "detached" assert volume.size == str(EXPANDED_VOLUME_SIZE*Gi) pod_manifest['metadata']['name'] = pod_name pod_manifest['spec']['volumes'] = [{ 'name': pod_manifest['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': {'claimName': pvc_name}, }] create_and_wait_pod(core_api, pod_manifest) resp = read_volume_data(core_api, pod_name) assert resp == test_data volume = client.by_id_volume(volume_name) engine = get_volume_engine(volume) assert volume.size == str(EXPANDED_VOLUME_SIZE*Gi) assert volume.size == engine.size
def test_backup_kubernetes_status(client, core_api, pod): # NOQA """ Test that Backups have KubernetesStatus stored properly when there is an associated PersistentVolumeClaim and Pod. """ host_id = get_self_host_id() static_sc_name = "longhorn-static-test" setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC) setting = client.update(setting, value=static_sc_name) assert setting["value"] == static_sc_name volume_name = "test-backup-kubernetes-status-pod" client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pod_name = "pod-" + volume_name pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') pvc_found = False for item in ret.items: if item.metadata.name == pvc_name: pvc_found = item break assert pvc_found assert pvc_found.spec.storage_class_name == static_sc_name pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) ks = { 'lastPodRefAt': '', 'lastPVCRefAt': '', 'namespace': 'default', 'pvcName': pvc_name, 'pvName': pv_name, 'pvStatus': 'Bound', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, volume_name, ks) volume = wait_for_volume_healthy(client, volume_name) # Create Backup manually instead of calling create_backup since Kubernetes # is not guaranteed to mount our Volume to the test host. snap = volume.snapshotCreate() volume.snapshotBackup(name=snap["name"]) bv, b = find_backup(client, volume_name, snap["name"]) new_b = bv.backupGet(name=b["name"]) status = loads(new_b["labels"].get(KUBERNETES_STATUS_LABEL)) assert status == ks restore_name = generate_volume_name() client.create_volume(name=restore_name, size=SIZE, numberOfReplicas=2, fromBackup=b["url"]) wait_for_volume_restoration_completed(client, restore_name) wait_for_volume_detached(client, restore_name) snapshot_created = b["snapshotCreated"] ks = { 'lastPodRefAt': b["snapshotCreated"], 'lastPVCRefAt': b["snapshotCreated"], 'namespace': 'default', 'pvcName': pvc_name, # Restoration should not apply PersistentVolume data. 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, restore_name, ks) restore = client.by_id_volume(restore_name) # We need to compare LastPodRefAt and LastPVCRefAt manually since # wait_volume_kubernetes_status only checks for empty or non-empty state. assert restore["kubernetesStatus"]["lastPodRefAt"] == ks["lastPodRefAt"] assert restore["kubernetesStatus"]["lastPVCRefAt"] == ks["lastPVCRefAt"] bv.backupDelete(name=b["name"]) client.delete(restore) wait_for_volume_delete(client, restore_name) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) delete_and_wait_pv(core_api, pv_name) # With the Pod, PVC, and PV deleted, the Volume should have both Ref # fields set. Check that a new Backup and Restore will use this instead of # manually populating the Ref fields. ks = { 'lastPodRefAt': 'NOT NULL', 'lastPVCRefAt': 'NOT NULL', 'namespace': 'default', 'pvcName': pvc_name, 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, volume_name, ks) volume = wait_for_volume_detached(client, volume_name) volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) snap = volume.snapshotCreate() volume.snapshotBackup(name=snap["name"]) bv, b = find_backup(client, volume_name, snap["name"]) new_b = bv.backupGet(name=b["name"]) status = loads(new_b["labels"].get(KUBERNETES_STATUS_LABEL)) # Check each field manually, we have no idea what the LastPodRefAt or the # LastPVCRefAt will be. We just know it shouldn't be SnapshotCreated. assert status["lastPodRefAt"] != snapshot_created assert status["lastPVCRefAt"] != snapshot_created assert status["namespace"] == "default" assert status["pvcName"] == pvc_name assert status["pvName"] == "" assert status["pvStatus"] == "" assert status["workloadsStatus"] == [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] restore_name = generate_volume_name() client.create_volume(name=restore_name, size=SIZE, numberOfReplicas=2, fromBackup=b["url"]) wait_for_volume_restoration_completed(client, restore_name) wait_for_volume_detached(client, restore_name) ks = { 'lastPodRefAt': status["lastPodRefAt"], 'lastPVCRefAt': status["lastPVCRefAt"], 'namespace': 'default', 'pvcName': pvc_name, 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, restore_name, ks) restore = client.by_id_volume(restore_name) assert restore["kubernetesStatus"]["lastPodRefAt"] == ks["lastPodRefAt"] assert restore["kubernetesStatus"]["lastPVCRefAt"] == ks["lastPVCRefAt"] bv.backupDelete(name=b["name"]) client.delete(restore) cleanup_volume(client, volume)
def test_kubernetes_status( client, core_api, storage_class, # NOQA statefulset, csi_pv, pvc, pod): # NOQA """ Test Volume feature: Kubernetes Status 1. Create StorageClass with `reclaimPolicy = Retain` 2. Create a statefulset `kubernetes-status-test` with the StorageClass 1. The statefulset has scale of 2. 3. Get the volume name from the SECOND pod of the StateufulSet pod and create an `extra_pod` with the same volume on the same node 4. Check the volumes that used by the StatefulSet 1. The volume used by the FIRST StatefulSet pod will have one workload 2. The volume used by the SECOND StatefulSet pod will have two workloads 3. Validate related status, e.g. pv/pod name/state, workload name/type 5. Check the volumes again 1. PV/PVC should still be bound 2. The volume used by the FIRST pod should have history data 3. The volume used by the SECOND and extra pod should have current data point to the extra pod 6. Delete the extra pod 1. Now all the volume's should only have history data(`lastPodRefAt` set) 7. Delete the PVC 1. PVC should be updated with status `Released` and become history data 8. Delete PV 1. All the Kubernetes status information should be cleaned up. 9. Reuse the two Longhorn volumes to create new pods 1. Since the `reclaimPolicy == Retain`, volume won't be deleted by Longhorn 2. Check the Kubernetes status now updated, with pod info but empty workload 3. Default Longhorn Static StorageClass will remove the PV with PVC, but leave Longhorn volume """ statefulset_name = 'kubernetes-status-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) storage_class['reclaimPolicy'] = 'Retain' create_storage_class(storage_class) create_and_wait_statefulset(statefulset) pod_info = get_statefulset_pod_info(core_api, statefulset) volume_info = [p['pv_name'] for p in pod_info] extra_pod_name = 'extra-pod-using-' + volume_info[1] pod['metadata']['name'] = extra_pod_name p2 = core_api.read_namespaced_pod(name=pod_info[1]['pod_name'], namespace='default') pod['spec']['nodeName'] = p2.spec.node_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pod_info[1]['pvc_name'], }, }] create_and_wait_pod(core_api, pod) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] # NOQA volume = client.by_id_volume(volume_name) k_status = volume.kubernetesStatus workloads = k_status.workloadsStatus assert k_status.pvName == p['pv_name'] assert k_status.pvStatus == 'Bound' assert k_status.namespace == 'default' assert k_status.pvcName == p['pvc_name'] assert not k_status.lastPVCRefAt assert not k_status.lastPodRefAt if i == 0: assert len(workloads) == 1 assert workloads[0].podName == p['pod_name'] assert workloads[0].workloadName == statefulset_name assert workloads[0].workloadType == 'StatefulSet' for _ in range(RETRY_COUNTS): if workloads[0].podStatus == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume.kubernetesStatus workloads = k_status.workloadsStatus assert workloads[0].podStatus == 'Running' if i == 1: assert len(k_status.workloadsStatus) == 2 if workloads[0].podName == pod_info[i]['pod_name']: assert workloads[1].podName == extra_pod_name assert workloads[0].workloadName == statefulset_name assert workloads[0].workloadType == 'StatefulSet' assert not workloads[1].workloadName assert not workloads[1].workloadType else: assert workloads[1].podName == pod_info[i]['pod_name'] assert workloads[0].podName == extra_pod_name assert not workloads[0].workloadName assert not workloads[0].workloadType assert workloads[1].workloadName == statefulset_name assert workloads[1].workloadType == 'StatefulSet' for _ in range(RETRY_COUNTS): if workloads[0].podStatus == 'Running' and \ workloads[1].podStatus == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume.kubernetesStatus workloads = k_status.workloadsStatus assert len(workloads) == 2 assert workloads[0].podStatus == 'Running' assert workloads[1].podStatus == 'Running' ks_list = [{}, {}] delete_and_wait_statefulset_only(core_api, statefulset) # the extra pod is still using the 2nd volume for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] ks_list[i]['pvName'] = p['pv_name'] ks_list[i]['pvStatus'] = 'Bound' ks_list[i]['namespace'] = 'default' ks_list[i]['pvcName'] = p['pvc_name'] ks_list[i]['lastPVCRefAt'] = '' if i == 0: ks_list[i]['lastPodRefAt'] = 'not empty' ks_list[i]['workloadsStatus'] = [ { 'podName': p['pod_name'], 'podStatus': 'Running', 'workloadName': statefulset_name, 'workloadType': 'StatefulSet', }, ] if i == 1: ks_list[i]['lastPodRefAt'] = '' ks_list[i]['workloadsStatus'] = [{ 'podName': extra_pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }] wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # deleted extra_pod, all volumes have no workload delete_and_wait_pod(core_api, pod['metadata']['name']) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] ks_list[i]['lastPodRefAt'] = 'not empty' wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # deleted pvc only. for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] delete_and_wait_pvc(core_api, p['pvc_name']) ks_list[i]['pvStatus'] = 'Released' ks_list[i]['lastPVCRefAt'] = 'not empty' wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # deleted pv only. for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] delete_and_wait_pv(core_api, p['pv_name']) ks_list[i]['pvName'] = '' ks_list[i]['pvStatus'] = '' wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # reuse that volume for p, volume_name in zip(pod_info, volume_info): p['pod_name'] = p['pod_name'].replace('kubernetes-status-test', 'kubernetes-status-test-reuse') p['pvc_name'] = p['pvc_name'].replace('kubernetes-status-test', 'kubernetes-status-test-reuse') p['pv_name'] = p['pvc_name'] csi_pv['metadata']['name'] = p['pv_name'] csi_pv['spec']['csi']['volumeHandle'] = volume_name csi_pv['spec']['storageClassName'] = \ DEFAULT_LONGHORN_STATIC_STORAGECLASS_NAME core_api.create_persistent_volume(csi_pv) pvc['metadata']['name'] = p['pvc_name'] pvc['spec']['volumeName'] = p['pv_name'] pvc['spec']['storageClassName'] = \ DEFAULT_LONGHORN_STATIC_STORAGECLASS_NAME core_api.create_namespaced_persistent_volume_claim(body=pvc, namespace='default') pod['metadata']['name'] = p['pod_name'] pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': p['pvc_name'], }, }] create_and_wait_pod(core_api, pod) ks = { 'pvName': p['pv_name'], 'pvStatus': 'Bound', 'namespace': 'default', 'pvcName': p['pvc_name'], 'lastPVCRefAt': '', 'lastPodRefAt': '', 'workloadsStatus': [ { 'podName': p['pod_name'], 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }, ], } wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pod(core_api, p['pod_name']) # Since persistentVolumeReclaimPolicy of csi_pv is `Delete`, # we don't need to delete bounded pv manually delete_and_wait_pvc(core_api, p['pvc_name']) wait_delete_pv(core_api, p['pv_name'])
def test_upgrade(upgrade_image_tag, settings_reset, volume_name, pod_make, statefulset, storage_class): # NOQA """ Test Longhorn upgrade Prerequisite: - Disable Auto Salvage Setting 1. Find the upgrade image tag 2. Create a volume, generate and write data into the volume. 3. Create a Pod using a volume, generate and write data 4. Create a StatefulSet with 2 replicas, generate and write data to their volumes 5. Keep all volumes attached 6. Upgrade Longhorn system. 7. Check Pod and StatefulSet didn't restart after upgrade 8. Check All volumes data 9. Write data to StatefulSet pods, and Attached volume 10. Check data written to StatefulSet pods, and attached volume. 11. Detach the volume, and Delete Pod, and StatefulSet to detach theirvolumes 12. Upgrade all volumes engine images. 13. Attach the volume, and recreate Pod, and StatefulSet 14. Check All volumes data """ new_ei_name = "longhornio/longhorn-engine:" + upgrade_image_tag client = get_longhorn_api_client() core_api = get_core_api_client() host_id = get_self_host_id() pod_data_path = "/data/test" pod_volume_name = generate_volume_name() auto_salvage_setting = client.by_id_setting(SETTING_AUTO_SALVAGE) setting = client.update(auto_salvage_setting, value="false") assert setting.name == SETTING_AUTO_SALVAGE assert setting.value == "false" # Create Volume attached to a node. volume1 = create_and_check_volume(client, volume_name, size=SIZE) volume1.attach(hostId=host_id) volume1 = wait_for_volume_healthy(client, volume_name) volume1_data = write_volume_random_data(volume1) # Create Volume used by Pod pod_name, pv_name, pvc_name, pod_md5sum = \ prepare_pod_with_data_in_mb(client, core_api, pod_make, pod_volume_name, data_path=pod_data_path, add_liveness_prope=False) # Create multiple volumes used by StatefulSet statefulset_name = 'statefulset-upgrade-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) create_storage_class(storage_class) create_and_wait_statefulset(statefulset) statefulset_pod_info = get_statefulset_pod_info(core_api, statefulset) for sspod_info in statefulset_pod_info: sspod_info['data'] = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, sspod_info['pod_name'], sspod_info['data']) # upgrade Longhorn assert longhorn_upgrade(upgrade_image_tag) client = get_longhorn_api_client() # wait for 1 minute before checking pod restarts time.sleep(60) pod = core_api.read_namespaced_pod(name=pod_name, namespace='default') assert pod.status.container_statuses[0].restart_count == 0 for sspod_info in statefulset_pod_info: sspod = core_api.read_namespaced_pod(name=sspod_info['pod_name'], namespace='default') assert \ sspod.status.container_statuses[0].restart_count == 0 for sspod_info in statefulset_pod_info: resp = read_volume_data(core_api, sspod_info['pod_name']) assert resp == sspod_info['data'] res_pod_md5sum = get_pod_data_md5sum(core_api, pod_name, pod_data_path) assert res_pod_md5sum == pod_md5sum check_volume_data(volume1, volume1_data) for sspod_info in statefulset_pod_info: sspod_info['data'] = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, sspod_info['pod_name'], sspod_info['data']) for sspod_info in statefulset_pod_info: resp = read_volume_data(core_api, sspod_info['pod_name']) assert resp == sspod_info['data'] volume1 = client.by_id_volume(volume_name) volume1_data = write_volume_random_data(volume1) check_volume_data(volume1, volume1_data) statefulset['spec']['replicas'] = replicas = 0 apps_api = get_apps_api_client() apps_api.patch_namespaced_stateful_set( name=statefulset_name, namespace='default', body={ 'spec': { 'replicas': replicas } }) delete_and_wait_pod(core_api, pod_name) volume = client.by_id_volume(volume_name) volume.detach() volumes = client.list_volume() for v in volumes: wait_for_volume_detached(client, v.name) engineimages = client.list_engine_image() for ei in engineimages: if ei.image == new_ei_name: new_ei = ei volumes = client.list_volume() for v in volumes: volume = client.by_id_volume(v.name) volume.engineUpgrade(image=new_ei.image) statefulset['spec']['replicas'] = replicas = 2 apps_api = get_apps_api_client() apps_api.patch_namespaced_stateful_set( name=statefulset_name, namespace='default', body={ 'spec': { 'replicas': replicas } }) wait_statefulset(statefulset) pod = pod_make(name=pod_name) pod['spec']['volumes'] = [create_pvc_spec(pvc_name)] create_and_wait_pod(core_api, pod) volume1 = client.by_id_volume(volume_name) volume1.attach(hostId=host_id) volume1 = wait_for_volume_healthy(client, volume_name) for sspod_info in statefulset_pod_info: resp = read_volume_data(core_api, sspod_info['pod_name']) assert resp == sspod_info['data'] res_pod_md5sum = get_pod_data_md5sum(core_api, pod_name, pod_data_path) assert res_pod_md5sum == pod_md5sum check_volume_data(volume1, volume1_data)
def test_backup_kubernetes_status(set_random_backupstore, client, core_api, pod): # NOQA """ Test that Backups have KubernetesStatus stored properly when there is an associated PersistentVolumeClaim and Pod. 1. Setup a random backupstore 2. Set settings Longhorn Static StorageClass to `longhorn-static-test` 3. Create a volume and PV/PVC. Verify the StorageClass of PVC 4. Create a Pod using the PVC. 5. Check volume's Kubernetes status to reflect PV/PVC/Pod correctly. 6. Create a backup for the volume. 7. Verify the labels of created backup reflect PV/PVC/Pod status. 8. Restore the backup to a volume. Wait for restoration to complete. 9. Check the volume's Kubernetes Status 1. Make sure the `lastPodRefAt` and `lastPVCRefAt` is snapshot created time 10. Delete the backup and restored volume. 11. Delete PV/PVC/Pod. 12. Verify volume's Kubernetes Status updated to reflect history data. 13. Attach the volume and create another backup. Verify the labels 14. Verify the volume's Kubernetes status. 15. Restore the previous backup to a new volume. Wait for restoration. 16. Verify the restored volume's Kubernetes status. 1. Make sure `lastPodRefAt` and `lastPVCRefAt` matched volume on step 12 """ host_id = get_self_host_id() static_sc_name = "longhorn-static-test" setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC) setting = client.update(setting, value=static_sc_name) assert setting.value == static_sc_name volume_name = "test-backup-kubernetes-status-pod" # NOQA client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pod_name = "pod-" + volume_name pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') pvc_found = False for item in ret.items: if item.metadata.name == pvc_name: pvc_found = item break assert pvc_found assert pvc_found.spec.storage_class_name == static_sc_name pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) ks = { 'lastPodRefAt': '', 'lastPVCRefAt': '', 'namespace': 'default', 'pvcName': pvc_name, 'pvName': pv_name, 'pvStatus': 'Bound', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, volume_name, ks) volume = wait_for_volume_healthy(client, volume_name) # Create Backup manually instead of calling create_backup since Kubernetes # is not guaranteed to mount our Volume to the test host. snap = create_snapshot(client, volume_name) volume.snapshotBackup(name=snap.name) wait_for_backup_completion(client, volume_name, snap.name) _, b = find_backup(client, volume_name, snap.name) # Check backup label status = loads(b.labels.get(KUBERNETES_STATUS_LABEL)) assert status == ks # Check backup volume label for _ in range(RETRY_COUNTS): bv = client.by_id_backupVolume(volume_name) if bv is not None and bv.labels is not None: break time.sleep(RETRY_INTERVAL) assert bv is not None and bv.labels is not None status = loads(bv.labels.get(KUBERNETES_STATUS_LABEL)) assert status == ks restore_name = generate_volume_name() client.create_volume(name=restore_name, size=SIZE, numberOfReplicas=2, fromBackup=b.url) wait_for_volume_restoration_completed(client, restore_name) wait_for_volume_detached(client, restore_name) snapshot_created = b.snapshotCreated ks = { 'lastPodRefAt': b.snapshotCreated, 'lastPVCRefAt': b.snapshotCreated, 'namespace': 'default', 'pvcName': pvc_name, # Restoration should not apply PersistentVolume data. 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, restore_name, ks) restore = client.by_id_volume(restore_name) # We need to compare LastPodRefAt and LastPVCRefAt manually since # wait_volume_kubernetes_status only checks for empty or non-empty state. assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"] assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"] delete_backup(client, bv.name, b.name) client.delete(restore) wait_for_volume_delete(client, restore_name) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) delete_and_wait_pv(core_api, pv_name) # With the Pod, PVC, and PV deleted, the Volume should have both Ref # fields set. Check that a new Backup and Restore will use this instead of # manually populating the Ref fields. ks = { 'lastPodRefAt': 'NOT NULL', 'lastPVCRefAt': 'NOT NULL', 'namespace': 'default', 'pvcName': pvc_name, 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, volume_name, ks) volume = wait_for_volume_detached(client, volume_name) volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) snap = create_snapshot(client, volume_name) volume.snapshotBackup(name=snap.name) volume = wait_for_backup_completion(client, volume_name, snap.name) bv, b = find_backup(client, volume_name, snap.name) new_b = bv.backupGet(name=b.name) status = loads(new_b.labels.get(KUBERNETES_STATUS_LABEL)) # Check each field manually, we have no idea what the LastPodRefAt or the # LastPVCRefAt will be. We just know it shouldn't be SnapshotCreated. assert status['lastPodRefAt'] != snapshot_created assert status['lastPVCRefAt'] != snapshot_created assert status['namespace'] == "default" assert status['pvcName'] == pvc_name assert status['pvName'] == "" assert status['pvStatus'] == "" assert status['workloadsStatus'] == [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] restore_name = generate_volume_name() client.create_volume(name=restore_name, size=SIZE, numberOfReplicas=2, fromBackup=b.url) wait_for_volume_restoration_completed(client, restore_name) wait_for_volume_detached(client, restore_name) ks = { 'lastPodRefAt': status['lastPodRefAt'], 'lastPVCRefAt': status['lastPVCRefAt'], 'namespace': 'default', 'pvcName': pvc_name, 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, restore_name, ks) restore = client.by_id_volume(restore_name) assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"] assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"] # cleanup backupstore_cleanup(client) client.delete(restore) cleanup_volume(client, volume)
def test_data_locality_basic(client, core_api, volume_name, pod, settings_reset): # NOQA """ Test data locality basic feature Context: Data Locality feature allows users to have an option to keep a local replica on the same node as the consuming pod. Longhorn is currently supporting 2 modes: - disabled: Longhorn does not try to keep a local replica - best-effort: Longhorn try to keep a local replica See manual tests at: https://github.com/longhorn/longhorn/issues/1045#issuecomment-680706283 Steps: Case 1: Test that Longhorn builds a local replica on the engine node 1. Create a volume(1) with 1 replica and dataLocality set to disabled 2. Find node where the replica is located on. Let's call the node is replica-node 3. Attach the volume to a node different than replica-node. Let call the node is engine-node 4. Write 200MB data to volume(1) 5. Use a retry loop to verify that Longhorn does not create a replica on the engine-node 6. Update dataLocality to best-effort for volume(1) 7. Use a retry loop to verify that Longhorn creates and rebuilds a replica on the engine-node and remove the other replica 8. detach the volume(1) and attach it to a different node. Let's call the new node is new-engine-node and the old node is old-engine-node 9. Wait for volume(1) to finish attaching 10. Use a retry loop to verify that Longhorn creates and rebuilds a replica on the new-engine-node and remove the replica on old-engine-node Case 2: Test that Longhorn prioritizes deleting replicas on the same node 1. Add the tag AVAIL to node-1 and node-2 2. Set node soft anti-affinity to `true`. 3. Create a volume(2) with 3 replicas and dataLocality set to best-effort 4. Use a retry loop to verify that all 3 replicas are on node-1 and node-2, no replica is on node-3 5. Attach volume(2) to node-3 6. User a retry loop to verify that there is no replica on node-3 and we can still read/write to volume(2) 7. Find the node which contains 2 replicas. Let call the node is most-replica-node 8. Set the replica count to 2 for volume(2) 9. Verify that Longhorn remove one replica from most-replica-node Case 3: Test that the volume is not corrupted if there is an unexpected detachment during building local replica 1. Remove the tag AVAIL from node-1 and node-2 Set node soft anti-affinity to `false`. 2. Create a volume(3) with 1 replicas and dataLocality set to best-effort 3. Attach volume(3) to node-3. 4. Use a retry loop to verify that volume(3) has only 1 replica on node-3 5. Write 800MB data to volume(3) 6. Detach volume(3) 7. Attach volume(3) to node-1 8. Use a retry loop to: Wait until volume(3) finishes attaching. Wait until Longhorn start rebuilding a replica on node-1 Immediately detach volume(3) 9. Verify that the replica on node-1 is in ERR state. 10. Attach volume(3) to node-1 11. Wait until volume(3) finishes attaching. 12. Use a retry loop to verify the Longhorn cleanup the ERR replica, rebuild a new replica on node-1, and remove the replica on node-3 Case 4: Make sure failed to schedule local replica doesn't block the the creation of other replicas. 1. Disable scheduling for node-3 2. Create a vol with 1 replica, `dataLocality = best-effort`. The replica is scheduled on a node (say node-1) 3. Attach vol to node-3. There is a fail-to-schedule replica with Spec.HardNodeAffinity=node-3 4. Increase numberOfReplica to 3. Verify that the replica set contains: one on node-1, one on node-2, one failed replica with Spec.HardNodeAffinity=node-3. 5. Decrease numberOfReplica to 2. Verify that the replica set contains: one on node-1, one on node-2, one failed replica with Spec.HardNodeAffinity=node-3. 6. Decrease numberOfReplica to 1. Verify that the replica set contains: one on node-1 or node-2, one failed replica with Spec.HardNodeAffinity=node-3. 7. Decrease numberOfReplica to 2. Verify that the replica set contains: one on node-1, one on node-2, one failed replica with Spec.HardNodeAffinity=node-3. 8. Turn off data locality by set `dataLocality=disabled` for the vol. Verify that the replica set contains: one on node-1, one on node-2 9. clean up """ # Case 1: Test that Longhorn builds a local replica on the engine node nodes = client.list_node() default_data_locality_setting = \ client.by_id_setting(SETTING_DEFAULT_DATA_LOCALITY) try: client.update(default_data_locality_setting, value="disabled") except Exception as e: print("Exception when update Default Data Locality setting", default_data_locality_setting, e) volume1_name = volume_name + "-1" volume1_size = str(500 * Mi) volume1_data_path = "/data/test" pv1_name = volume1_name + "-pv" pvc1_name = volume1_name + "-pvc" pod1_name = volume1_name + "-pod" pod1 = pod pod1['metadata']['name'] = pod1_name volume1 = create_and_check_volume(client, volume1_name, num_of_replicas=1, size=volume1_size) volume1 = client.by_id_volume(volume1_name) create_pv_for_volume(client, core_api, volume1, pv1_name) create_pvc_for_volume(client, core_api, volume1, pvc1_name) volume1 = client.by_id_volume(volume1_name) volume1_replica_node = volume1.replicas[0]['hostId'] volume1_attached_node = None for node in nodes: if node.name != volume1_replica_node: volume1_attached_node = node.name break assert volume1_attached_node is not None pod1['spec']['volumes'] = [{ "name": "pod-data", "persistentVolumeClaim": { "claimName": pvc1_name } }] pod1['spec']['nodeSelector'] = \ {"kubernetes.io/hostname": volume1_attached_node} create_and_wait_pod(core_api, pod1) write_pod_volume_random_data(core_api, pod1_name, volume1_data_path, DATA_SIZE_IN_MB_2) for i in range(10): volume1 = client.by_id_volume(volume1_name) assert len(volume1.replicas) == 1 assert volume1.replicas[0]['hostId'] != volume1_attached_node time.sleep(1) volume1 = client.by_id_volume(volume1_name) volume1.updateDataLocality(dataLocality="best-effort") for _ in range(RETRY_COUNTS): volume1 = client.by_id_volume(volume1_name) assert volume1[VOLUME_FIELD_ROBUSTNESS] == VOLUME_ROBUSTNESS_HEALTHY if len(volume1.replicas) == 1 and \ volume1.replicas[0]['hostId'] == volume1_attached_node: break time.sleep(RETRY_INTERVAL) assert len(volume1.replicas) == 1 assert volume1.replicas[0]['hostId'] == volume1_attached_node delete_and_wait_pod(core_api, pod1_name) volume1 = wait_for_volume_detached(client, volume1_name) volume1_replica_node = volume1.replicas[0]['hostId'] volume1_attached_node = None for node in nodes: if node.name != volume1_replica_node: volume1_attached_node = node.name break assert volume1_attached_node is not None pod1['spec']['nodeSelector'] = \ {"kubernetes.io/hostname": volume1_attached_node} create_and_wait_pod(core_api, pod1) for _ in range(RETRY_COUNTS): volume1 = client.by_id_volume(volume1_name) assert volume1[VOLUME_FIELD_ROBUSTNESS] == VOLUME_ROBUSTNESS_HEALTHY if len(volume1.replicas) == 1 and \ volume1.replicas[0]['hostId'] == volume1_attached_node: break time.sleep(RETRY_INTERVAL) assert len(volume1.replicas) == 1 assert volume1.replicas[0]['hostId'] == volume1_attached_node delete_and_wait_pod(core_api, pod1_name) wait_for_volume_detached(client, volume1_name) # Case 2: Test that Longhorn prioritizes deleting replicas on the same node node1 = nodes[0] node2 = nodes[1] node3 = nodes[2] client.update(node1, allowScheduling=True, tags=["AVAIL"]) client.update(node2, allowScheduling=True, tags=["AVAIL"]) replica_node_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) try: client.update(replica_node_soft_anti_affinity_setting, value="true") except Exception as e: print( "Exception when update " "Replica Node Level Soft Anti-Affinity setting", replica_node_soft_anti_affinity_setting, e) volume2_name = volume_name + "-2" volume2_size = str(500 * Mi) pv2_name = volume2_name + "-pv" pvc2_name = volume2_name + "-pvc" pod2_name = volume2_name + "-pod" pod2 = pod pod2['metadata']['name'] = pod2_name volume2 = client.create_volume(name=volume2_name, size=volume2_size, numberOfReplicas=3, nodeSelector=["AVAIL"], dataLocality="best-effort") volume2 = wait_for_volume_detached(client, volume2_name) volume2 = client.by_id_volume(volume2_name) create_pv_for_volume(client, core_api, volume2, pv2_name) create_pvc_for_volume(client, core_api, volume2, pvc2_name) volume2 = client.by_id_volume(volume2_name) pod2['spec']['volumes'] = [{ "name": "pod-data", "persistentVolumeClaim": { "claimName": pvc2_name } }] pod2['spec']['nodeSelector'] = {"kubernetes.io/hostname": node3.name} create_and_wait_pod(core_api, pod2) volume2 = wait_for_volume_healthy(client, volume2_name) for replica in volume2.replicas: assert replica["hostId"] != node3.name volume2.updateReplicaCount(replicaCount=2) # 2 Healthy replicas and 1 replica failed to schedule # The failed to schedule replica is the local replica on node3 volume2 = wait_for_volume_replica_count(client, volume2_name, 3) volume2 = client.by_id_volume(volume2_name) volume2_healthy_replicas = [] for replica in volume2.replicas: if replica.running is True: volume2_healthy_replicas.append(replica) assert len(volume2_healthy_replicas) == 2 volume2_rep1 = volume2_healthy_replicas[0] volume2_rep2 = volume2_healthy_replicas[1] assert volume2_rep1["hostId"] != volume2_rep2["hostId"] delete_and_wait_pod(core_api, pod2_name) wait_for_volume_detached(client, volume2_name) # Case 3: Test that the volume is not corrupted if there is an unexpected # detachment during building local replica client.update(node1, allowScheduling=True, tags=[]) client.update(node2, allowScheduling=True, tags=[]) replica_node_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) try: client.update(replica_node_soft_anti_affinity_setting, value="false") except Exception as e: print( "Exception when update " "Replica Node Level Soft Anti-Affinity setting", replica_node_soft_anti_affinity_setting, e) volume3_name = volume_name + "-3" volume3_size = str(1 * Gi) volume3_data_path = "/data/test" pv3_name = volume3_name + "-pv" pvc3_name = volume3_name + "-pvc" pod3_name = volume3_name + "-pod" pod3 = pod pod3['metadata']['name'] = pod3_name volume3 = client.create_volume(name=volume3_name, size=volume3_size, numberOfReplicas=1) volume3 = wait_for_volume_detached(client, volume3_name) volume3 = client.by_id_volume(volume3_name) create_pv_for_volume(client, core_api, volume3, pv3_name) create_pvc_for_volume(client, core_api, volume3, pvc3_name) volume3 = client.by_id_volume(volume3_name) pod3['spec']['volumes'] = [{ "name": "pod-data", "persistentVolumeClaim": { "claimName": pvc3_name } }] pod3['spec']['nodeSelector'] = {"kubernetes.io/hostname": node3.name} create_and_wait_pod(core_api, pod3) volume3 = wait_for_volume_healthy(client, volume3_name) write_pod_volume_random_data(core_api, pod3_name, volume3_data_path, DATA_SIZE_IN_MB_4) volume3.updateDataLocality(dataLocality="best-effort") volume3 = client.by_id_volume(volume3_name) if volume3.replicas[0]['hostId'] != node3.name: wait_for_rebuild_start(client, volume3_name) volume3 = client.by_id_volume(volume3_name) assert len(volume3.replicas) == 2 wait_for_rebuild_complete(client, volume3_name) volume3 = wait_for_volume_replica_count(client, volume3_name, 1) assert volume3.replicas[0]["hostId"] == node3.name delete_and_wait_pod(core_api, pod3_name) pod3['spec']['nodeSelector'] = {"kubernetes.io/hostname": node1.name} create_and_wait_pod(core_api, pod3) wait_for_rebuild_start(client, volume3_name) crash_engine_process_with_sigkill(client, core_api, volume3_name) delete_and_wait_pod(core_api, pod3_name) wait_for_volume_detached(client, volume3_name) volume3 = client.by_id_volume(volume3_name) assert len(volume3.replicas) == 1 assert volume3.replicas[0]["hostId"] == node3.name create_and_wait_pod(core_api, pod3) wait_for_rebuild_start(client, volume3_name) volume3 = client.by_id_volume(volume3_name) assert len(volume3.replicas) == 2 wait_for_rebuild_complete(client, volume3_name) # Wait for deletion of extra replica volume3 = wait_for_volume_replica_count(client, volume3_name, 1) assert volume3.replicas[0]["hostId"] == node1.name assert volume3.replicas[0]["mode"] == "RW" assert volume3.replicas[0]["running"] is True delete_and_wait_pod(core_api, pod3_name) wait_for_volume_detached(client, volume3_name) # Case 4: Make sure failed to schedule local replica doesn't block the # the creation of other replicas. replica_node_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) try: client.update(replica_node_soft_anti_affinity_setting, value="false") except Exception as e: print( "Exception when update " "Replica Node Level Soft Anti-Affinity setting", replica_node_soft_anti_affinity_setting, e) client.update(node3, allowScheduling=False) volume4_name = volume_name + "-4" volume4_size = str(1 * Gi) volume4 = client.create_volume(name=volume4_name, size=volume4_size, numberOfReplicas=1, dataLocality="best-effort") volume4 = wait_for_volume_detached(client, volume4_name) volume4 = client.by_id_volume(volume4_name) volume4_replica_name = volume4.replicas[0]["name"] volume4.attach(hostId=node3.name) wait_for_volume_healthy(client, volume4_name) volume4 = client.by_id_volume(volume4_name) assert len(volume4.replicas) == 2 for replica in volume4.replicas: if replica["name"] == volume4_replica_name: assert replica["running"] is True assert replica["mode"] == "RW" else: assert replica["running"] is False assert replica["mode"] == "" assert volume4.conditions.scheduled.reason == \ "LocalReplicaSchedulingFailure" volume4 = volume4.updateReplicaCount(replicaCount=3) volume4 = wait_for_volume_degraded(client, volume4_name) v4_node1_replica_count = 0 v4_node2_replica_count = 0 v4_failed_replica_count = 0 for replica in volume4.replicas: if replica["hostId"] == node1.name: v4_node1_replica_count += 1 elif replica["hostId"] == node2.name: v4_node2_replica_count += 1 elif replica["hostId"] == "": v4_failed_replica_count += 1 assert v4_node1_replica_count == 1 assert v4_node2_replica_count == 1 assert v4_failed_replica_count > 0 volume4 = volume4.updateReplicaCount(replicaCount=2) volume4 = wait_for_volume_replica_count(client, volume4_name, 3) v4_node1_replica_count = 0 v4_node2_replica_count = 0 v4_failed_replica_count = 0 for replica in volume4.replicas: if replica["hostId"] == node1.name: v4_node1_replica_count += 1 elif replica["hostId"] == node2.name: v4_node2_replica_count += 1 elif replica["hostId"] == "": v4_failed_replica_count += 1 assert v4_node1_replica_count == 1 assert v4_node2_replica_count == 1 assert v4_failed_replica_count > 0 volume4 = volume4.updateReplicaCount(replicaCount=1) volume4 = wait_for_volume_replica_count(client, volume4_name, 2) v4_node1_replica_count = 0 v4_node2_replica_count = 0 v4_failed_replica_count = 0 for replica in volume4.replicas: if replica["hostId"] == node1.name: v4_node1_replica_count += 1 elif replica["hostId"] == node2.name: v4_node2_replica_count += 1 elif replica["hostId"] == "": v4_failed_replica_count += 1 assert v4_node1_replica_count + v4_node2_replica_count == 1 assert v4_failed_replica_count == 1 volume4 = volume4.updateDataLocality(dataLocality="disabled") volume4 = volume4.updateReplicaCount(replicaCount=2) running_replica_count = 0 for _ in range(RETRY_COUNTS): volume4 = client.by_id_volume(volume4_name) running_replica_count = 0 for r in volume4.replicas: if r.failedAt == "" and r.running is True: running_replica_count += 1 if running_replica_count == 2: break time.sleep(RETRY_INTERVAL) assert running_replica_count == 2 v4_node1_replica_count = 0 v4_node2_replica_count = 0 v4_node3_replica_count = 0 for replica in volume4.replicas: wait_for_replica_running(client, volume4_name, replica["name"]) if replica["hostId"] == node1.name: v4_node1_replica_count += 1 elif replica["hostId"] == node2.name: v4_node2_replica_count += 1 elif replica["hostId"] == node3.name: v4_node3_replica_count += 1 assert v4_node1_replica_count == 1 assert v4_node2_replica_count == 1 assert v4_node3_replica_count == 0