def test_replica_auto_balance_zone_best_effort_with_uneven_node_in_zones( client, core_api, volume_name, pod): # NOQA """ Given set `replica-soft-anti-affinity` to `true`. And set `replica-zone-soft-anti-affinity` to `true`. And set `replicaAutoBalance` to `best-effort`. And set node-1 to zone-1. set node-2 to zone-1. set node-3 to zone-1. set node-4 to zone-2. set node-5 to zone-2. And disable scheduling for node-2. disable scheduling for node-3. disable scheduling for node-4. disable scheduling for node-5. And create volume with 4 replicas. And attach the volume to node-1. Scenario: replica auto-balance zones with best-effort should balance replicas in zone. Given 4 replica running on node-1. 0 replica running on node-2. 0 replica running on node-3. 0 replica running on node-4. 0 replica running on node-5. When enable scheduling for node-4. Then count replicas on each zones. And 2 replica running on zode-1. 2 replica running on zode-2. When enable scheduling for node-2. enable scheduling for node-3. Then count replicas on each nodes. And 1 replica running on node-1. 1 replica running on node-2. 1 replica running on node-3. 1 replica running on node-4. 0 replica running on node-5. When enable scheduling for node-5. Then count replicas on each zones. And 2 replica running on zode-1. 2 replica running on zode-2. """ common.update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY, "true") common.update_setting(client, SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY, "true") common.update_setting(client, SETTING_DEFAULT_DATA_LOCALITY, "best-effort") common.update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "best-effort") n1, n2, n3, n4, n5 = client.list_node() set_k8s_node_zone_label(core_api, n1.name, ZONE1) set_k8s_node_zone_label(core_api, n2.name, ZONE1) set_k8s_node_zone_label(core_api, n3.name, ZONE1) set_k8s_node_zone_label(core_api, n4.name, ZONE2) set_k8s_node_zone_label(core_api, n5.name, ZONE2) wait_longhorn_node_zone_updated(client) client.update(n2, allowScheduling=False) client.update(n3, allowScheduling=False) client.update(n4, allowScheduling=False) client.update(n5, allowScheduling=False) n_replicas = 4 volume = create_and_check_volume(client, volume_name, num_of_replicas=n_replicas) volume.attach(hostId=n1.name) for _ in range(RETRY_COUNTS): n1_r_count = common.get_host_replica_count(client, volume_name, n1.name, chk_running=True) n2_r_count = common.get_host_replica_count(client, volume_name, n2.name, chk_running=False) n3_r_count = common.get_host_replica_count(client, volume_name, n3.name, chk_running=False) n4_r_count = common.get_host_replica_count(client, volume_name, n4.name, chk_running=False) n5_r_count = common.get_host_replica_count(client, volume_name, n5.name, chk_running=False) if n1_r_count == 4 and \ n2_r_count == n3_r_count == n4_r_count == n5_r_count == 0: break time.sleep(RETRY_INTERVAL) assert n1_r_count == 4 assert n2_r_count == 0 assert n3_r_count == 0 assert n4_r_count == 0 assert n5_r_count == 0 client.update(n4, allowScheduling=True) for _ in range(RETRY_COUNTS): z1_r_count = get_zone_replica_count(client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count(client, volume_name, ZONE2, chk_running=True) if z1_r_count == z2_r_count == 2: break time.sleep(RETRY_INTERVAL) assert z1_r_count == 2 assert z2_r_count == 2 client.update(n2, allowScheduling=True) client.update(n3, allowScheduling=True) for _ in range(RETRY_COUNTS): n1_r_count = common.get_host_replica_count(client, volume_name, n1.name, chk_running=True) n2_r_count = common.get_host_replica_count(client, volume_name, n2.name, chk_running=True) n3_r_count = common.get_host_replica_count(client, volume_name, n3.name, chk_running=True) n4_r_count = common.get_host_replica_count(client, volume_name, n4.name, chk_running=True) n5_r_count = common.get_host_replica_count(client, volume_name, n5.name, chk_running=False) if n1_r_count == n2_r_count == n3_r_count == n4_r_count == 1 and \ n5_r_count == 0: break time.sleep(RETRY_INTERVAL) assert n1_r_count == 1 assert n2_r_count == 1 assert n3_r_count == 1 assert n4_r_count == 1 assert n5_r_count == 0 client.update(n5, allowScheduling=True) for _ in range(RETRY_COUNTS): z1_r_count = get_zone_replica_count(client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count(client, volume_name, ZONE2, chk_running=True) if z1_r_count == z2_r_count == 2: break time.sleep(RETRY_INTERVAL) assert z1_r_count == 2 assert z2_r_count == 2
def test_replica_auto_balance_node_duplicates_in_multiple_zones( client, core_api, volume_name): # NOQA """ Scenario: replica auto-balance to nodes with duplicated replicas in the zone. Given set `replica-soft-anti-affinity` to `true`. And set `replica-zone-soft-anti-affinity` to `true`. And set volume spec `replicaAutoBalance` to `least-effort`. And set node-1 to zone-1. set node-2 to zone-2. And disable scheduling for node-3. And create a volume with 3 replicas. And attach the volume to self-node. And zone-1 and zone-2 should contain 3 replica in total. When set node-3 to the zone with duplicated replicas. And enable scheduling for node-3. Then count replicas running on each node. And 1 replica running on node-1 1 replica running on node-2 1 replica running on node-3. And count replicas running in each zone. And total of 3 replicas running in zone-1 and zone-2. """ common.update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY, "true") common.update_setting(client, SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY, "true") common.update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "least-effort") n1, n2, n3 = client.list_node() set_k8s_node_zone_label(core_api, n1.name, ZONE1) set_k8s_node_zone_label(core_api, n2.name, ZONE2) set_k8s_node_zone_label(core_api, n3.name, "temp") wait_longhorn_node_zone_updated(client) client.update(n3, allowScheduling=False) n_replicas = 3 volume = create_and_check_volume(client, volume_name, num_of_replicas=n_replicas) volume.attach(hostId=get_self_host_id()) z1_r_count = get_zone_replica_count(client, volume_name, ZONE1) z2_r_count = get_zone_replica_count(client, volume_name, ZONE2) assert z1_r_count + z2_r_count == n_replicas if z1_r_count == 2: set_k8s_node_zone_label(core_api, n3.name, ZONE1) else: set_k8s_node_zone_label(core_api, n3.name, ZONE2) client.update(n3, allowScheduling=True) for _ in range(RETRY_COUNTS): n1_r_count = common.get_host_replica_count(client, volume_name, n1.name, chk_running=True) n2_r_count = common.get_host_replica_count(client, volume_name, n2.name, chk_running=True) n3_r_count = common.get_host_replica_count(client, volume_name, n3.name, chk_running=True) if n1_r_count == n2_r_count == n3_r_count == 1: break time.sleep(RETRY_INTERVAL) assert n1_r_count == 1 assert n2_r_count == 1 assert n3_r_count == 1 z1_r_count = get_zone_replica_count(client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count(client, volume_name, ZONE2, chk_running=True) assert z1_r_count + z2_r_count == n_replicas
def test_replica_auto_balance_zone_best_effort(client, core_api, volume_name): # NOQA """ Scenario: replica auto-balance zones with best-effort. Given set `replica-soft-anti-affinity` to `true`. And set `replica-zone-soft-anti-affinity` to `true`. And set volume spec `replicaAutoBalance` to `best-effort`. And set node-1 to zone-1. set node-2 to zone-2. set node-3 to zone-3. And disable scheduling for node-2. disable scheduling for node-3. And create a volume with 6 replicas. And attach the volume to self-node. And 6 replicas running in zone-1. 0 replicas running in zone-2. 0 replicas running in zone-3. When enable scheduling for node-2. Then count replicas running on each node. And 3 replicas running in zone-1. 3 replicas running in zone-2. 0 replicas running in zone-3. When enable scheduling for node-3. Then count replicas running on each node. And 2 replicas running in zone-1. 2 replicas running in zone-2. 2 replicas running in zone-3. """ common.update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY, "true") common.update_setting(client, SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY, "true") common.update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "best-effort") n1, n2, n3 = client.list_node() set_k8s_node_zone_label(core_api, n1.name, ZONE1) set_k8s_node_zone_label(core_api, n2.name, ZONE2) set_k8s_node_zone_label(core_api, n3.name, ZONE3) wait_longhorn_node_zone_updated(client) client.update(n2, allowScheduling=False) client.update(n3, allowScheduling=False) n_replicas = 6 volume = create_and_check_volume(client, volume_name, num_of_replicas=n_replicas) volume.attach(hostId=get_self_host_id()) for _ in range(RETRY_COUNTS): z1_r_count = get_zone_replica_count(client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count(client, volume_name, ZONE2, chk_running=True) z3_r_count = get_zone_replica_count(client, volume_name, ZONE3, chk_running=True) if z1_r_count == 6 and z2_r_count == z3_r_count == 0: break time.sleep(RETRY_INTERVAL) assert z1_r_count == 6 assert z2_r_count == 0 assert z3_r_count == 0 client.update(n2, allowScheduling=True) for _ in range(RETRY_COUNTS): z1_r_count = get_zone_replica_count(client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count(client, volume_name, ZONE2, chk_running=True) z3_r_count = get_zone_replica_count(client, volume_name, ZONE3, chk_running=True) if z1_r_count == z2_r_count == 3 and z3_r_count == 0: break time.sleep(RETRY_INTERVAL_LONG) assert z1_r_count == 3 assert z2_r_count == 3 assert z3_r_count == 0 client.update(n3, allowScheduling=True) for _ in range(RETRY_COUNTS): z1_r_count = get_zone_replica_count(client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count(client, volume_name, ZONE2, chk_running=True) z3_r_count = get_zone_replica_count(client, volume_name, ZONE3, chk_running=True) if z1_r_count == z2_r_count == z3_r_count == 2: break time.sleep(RETRY_INTERVAL_LONG) assert z1_r_count == 2 assert z2_r_count == 2 assert z3_r_count == 2
def test_replica_auto_balance_zone_best_effort_with_data_locality( client, core_api, volume_name, pod): # NOQA """ Background: Given set `replica-soft-anti-affinity` to `true`. And set `replica-zone-soft-anti-affinity` to `true`. And set `default-data-locality` to `best-effort`. And set `replicaAutoBalance` to `best-effort`. And set node-1 to zone-1. set node-2 to zone-1. set node-3 to zone-2. And create volume with 2 replicas. And create pv for volume. And create pvc for volume. Scenario Outline: replica auto-balance zones with best-effort should not remove pod local replicas when data locality is enabled (best-effort). Given create and wait pod on <pod-node>. And disable scheduling and evict node-3. And count replicas on each nodes. And 1 replica running on <pod-node>. 1 replica running on <duplicate-node>. 0 replica running on node-3. When enable scheduling for node-3. Then count replicas on each nodes. And 1 replica running on <pod-node>. 0 replica running on <duplicate-node>. 1 replica running on node-3. And count replicas in each zones. And 1 replica running in zone-1. 1 replica running in zone-2. And loop 3 times with each wait 5 seconds and count replicas on each nodes. To ensure no addition scheduling is happening. 1 replica running on <pod-node>. 0 replica running on <duplicate-node>. 1 replica running on node-3. And delete pod. Examples: | pod-node | duplicate-node | | node-1 | node-2 | | node-2 | node-1 | | node-1 | node-2 | """ common.update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY, "true") common.update_setting(client, SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY, "true") common.update_setting(client, SETTING_DEFAULT_DATA_LOCALITY, "best-effort") common.update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "best-effort") n1, n2, n3 = client.list_node() set_k8s_node_zone_label(core_api, n1.name, ZONE1) set_k8s_node_zone_label(core_api, n2.name, ZONE1) set_k8s_node_zone_label(core_api, n3.name, ZONE2) wait_longhorn_node_zone_updated(client) n_replicas = 2 volume = create_and_check_volume(client, volume_name, num_of_replicas=n_replicas) common.create_pv_for_volume(client, core_api, volume, volume_name) common.create_pvc_for_volume(client, core_api, volume, volume_name) pod['spec']['volumes'] = [{ "name": "pod-data", "persistentVolumeClaim": { "claimName": volume_name } }] for i in range(1, 4): pod_node_name = n2.name if i % 2 == 0 else n1.name pod['spec']['nodeSelector'] = {"kubernetes.io/hostname": pod_node_name} common.create_and_wait_pod(core_api, pod) client.update(n3, allowScheduling=False, evictionRequested=True) duplicate_node = [n1.name, n2.name] duplicate_node.remove(pod_node_name) for _ in range(RETRY_COUNTS): pod_node_r_count = common.get_host_replica_count(client, volume_name, pod_node_name, chk_running=True) duplicate_node_r_count = common.get_host_replica_count( client, volume_name, duplicate_node[0], chk_running=True) balance_node_r_count = common.get_host_replica_count( client, volume_name, n3.name, chk_running=False) if pod_node_r_count == duplicate_node_r_count == 1 and \ balance_node_r_count == 0: break time.sleep(RETRY_INTERVAL) assert pod_node_r_count == 1 assert duplicate_node_r_count == 1 assert balance_node_r_count == 0 client.update(n3, allowScheduling=True) for _ in range(RETRY_COUNTS): pod_node_r_count = common.get_host_replica_count(client, volume_name, pod_node_name, chk_running=True) duplicate_node_r_count = common.get_host_replica_count( client, volume_name, duplicate_node[0], chk_running=False) balance_node_r_count = common.get_host_replica_count( client, volume_name, n3.name, chk_running=True) if pod_node_r_count == balance_node_r_count == 1 and \ duplicate_node_r_count == 0: break time.sleep(RETRY_INTERVAL) assert pod_node_r_count == 1 assert duplicate_node_r_count == 0 assert balance_node_r_count == 1 z1_r_count = get_zone_replica_count(client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count(client, volume_name, ZONE2, chk_running=True) assert z1_r_count == z2_r_count == 1 # loop 3 times and each to wait 5 seconds to ensure there is no # re-scheduling happening. for _ in range(3): time.sleep(5) assert pod_node_r_count == common.get_host_replica_count( client, volume_name, pod_node_name, chk_running=True) assert duplicate_node_r_count == common.get_host_replica_count( client, volume_name, duplicate_node[0], chk_running=False) assert balance_node_r_count == common.get_host_replica_count( client, volume_name, n3.name, chk_running=True) common.delete_and_wait_pod(core_api, pod['metadata']['name'])
def test_setting_backing_image_auto_cleanup(client, core_api, volume_name): # NOQA """ Test that the Backing Image Cleanup Wait Interval setting works correctly. The default value of setting `BackingImageCleanupWaitInterval` is 60. 1. Create a backing image. 2. Create multiple volumes using the backing image. 3. Attach all volumes, Then: 1. Wait for all volumes can become running. 2. Verify the correct in all volumes. 3. Verify the backing image disk status map. 4. Verify the only backing image file in each disk is reused by multiple replicas. The backing image file path is `<Data path>/<The backing image name>/backing` 4. Unschedule test node to guarantee when replica removed from test node, no new replica can be rebuilt on the test node. 5. Remove all replicas in one disk. Wait for 50 seconds. Then verify nothing changes in the backing image disk state map (before the cleanup wait interval is passed). 6. Modify `BackingImageCleanupWaitInterval` to a small value. Then verify: 1. The download state of the disk containing no replica becomes terminating first, and the entry will be removed from the map later. 2. The related backing image file is removed. 3. The download state of other disks keep unchanged. All volumes still work fine. 7. Delete all volumes. Verify that there will only remain 1 entry in the backing image disk map 8. Delete the backing image. """ # Step 1 create_backing_image_with_matching_url(client, BACKING_IMAGE_NAME, BACKING_IMAGE_QCOW2_URL) # Step 2 volume_names = [volume_name + "-1", volume_name + "-2", volume_name + "-3"] for volume_name in volume_names: volume = create_and_check_volume(client, volume_name, 3, str(BACKING_IMAGE_EXT4_SIZE), BACKING_IMAGE_NAME) # Step 3 lht_host_id = get_self_host_id() for volume_name in volume_names: volume = client.by_id_volume(volume_name) volume.attach(hostId=lht_host_id) wait_for_volume_healthy(client, volume_name) assert volume.backingImage == BACKING_IMAGE_NAME backing_image = client.by_id_backing_image(BACKING_IMAGE_NAME) assert len(backing_image.diskFileStatusMap) == 3 for disk_id, status in iter(backing_image.diskFileStatusMap.items()): assert status.state == "ready" backing_images_in_disk = os.listdir("/var/lib/longhorn/backing-images") assert len(backing_images_in_disk) == 1 assert os.path.exists("/var/lib/longhorn/backing-images/{}/backing".format( backing_images_in_disk[0])) assert os.path.exists( "/var/lib/longhorn/backing-images/{}/backing.cfg".format( backing_images_in_disk[0])) # Step 4 current_host = client.by_id_node(id=lht_host_id) client.update(current_host, allowScheduling=False) wait_for_node_update(client, lht_host_id, "allowScheduling", False) # Step 5 for volume_name in volume_names: volume = client.by_id_volume(volume_name) for replica in volume.replicas: if replica.hostId == lht_host_id: replica_name = replica.name volume.replicaRemove(name=replica_name) # This wait interval should be smaller than the setting value. # Otherwise, the backing image files may be cleaned up. time.sleep(int(BACKING_IMAGE_CLEANUP_WAIT_INTERVAL)) check_backing_image_disk_map_status(client, BACKING_IMAGE_NAME, 3, "ready") # Step 6 update_setting(client, "backing-image-cleanup-wait-interval", "1") check_backing_image_disk_map_status(client, BACKING_IMAGE_NAME, 2, "ready") backing_images_in_disk = os.listdir("/var/lib/longhorn/backing-images") assert len(backing_images_in_disk) == 0 # Step 7 for volume_name in volume_names: volume = client.by_id_volume(volume_name) client.delete(volume) wait_for_volume_delete(client, volume_name) check_backing_image_disk_map_status(client, BACKING_IMAGE_NAME, 1, "ready")