def test_statefulset_recurring_backup(set_random_backupstore, client, core_api, storage_class, statefulset):  # NOQA
    """
    Scenario : test recurring backups on StatefulSets

    Given 1 default backup recurring jobs created.

    When create a statefulset.
    And write data to every statefulset pod.
    And wait for 5 minutes.

    Then 2 snapshots created for every statefulset pod.
    """

    # backup every minute
    recurring_jobs = {
        "backup": {
            "task": "backup",
            "groups": ["default"],
            "cron": "* * * * *",
            "retain": 2,
            "concurrency": 2,
            "labels": {},
        },
    }
    create_recurring_jobs(client, recurring_jobs)
    check_recurring_jobs(client, recurring_jobs)

    statefulset_name = 'statefulset-backup-test'
    update_statefulset_manifests(statefulset, storage_class, statefulset_name)

    create_storage_class(storage_class)
    create_and_wait_statefulset(statefulset)

    pod_data = get_statefulset_pod_info(core_api, statefulset)
    for pod in pod_data:
        pod['data'] = generate_random_data(VOLUME_RWTEST_SIZE)
        pod['backup_snapshot'] = ''

    for pod in pod_data:
        volume = client.by_id_volume(pod['pv_name'])
        write_pod_volume_data(core_api, pod['pod_name'], pod['data'])

    time.sleep(150)

    for pod in pod_data:
        volume = client.by_id_volume(pod['pv_name'])
        write_pod_volume_data(core_api, pod['pod_name'], pod['data'])

    time.sleep(150)

    for pod in pod_data:
        volume = client.by_id_volume(pod['pv_name'])
        snapshots = volume.snapshotList()
        count = 0
        for snapshot in snapshots:
            if snapshot.removed is False:
                count += 1

        # one backup + volume-head
        assert count == 2
Пример #2
0
def test_statefulset_recurring_backup(
        client,
        core_api,
        storage_class,  # NOQA
        statefulset):  # NOQA
    """
    Test that recurring backups on StatefulSets work properly.

    1. Create a StatefulSet with VolumeClaimTemplate and Longhorn.
    2. Wait for pods to run.
    3. Write some data to every pod
    4. Schedule recurring jobs for volumes using Longhorn API
    5. Wait for 5 minutes
    6. Verify the snapshots created by the recurring jobs.
    """

    statefulset_name = 'statefulset-backup-test'
    update_statefulset_manifests(statefulset, storage_class, statefulset_name)

    create_storage_class(storage_class)
    create_and_wait_statefulset(statefulset)

    # backup every minute
    job_backup = {
        "name": "backup",
        "cron": "* * * * *",
        "task": "backup",
        "retain": 2
    }
    pod_data = get_statefulset_pod_info(core_api, statefulset)
    for pod in pod_data:
        pod['data'] = generate_random_data(VOLUME_RWTEST_SIZE)
        pod['backup_snapshot'] = ''

    for pod in pod_data:
        volume = client.by_id_volume(pod['pv_name'])
        write_pod_volume_data(core_api, pod['pod_name'], pod['data'])
        volume.recurringUpdate(jobs=[job_backup])

    time.sleep(150)

    for pod in pod_data:
        volume = client.by_id_volume(pod['pv_name'])
        write_pod_volume_data(core_api, pod['pod_name'], pod['data'])
        volume.recurringUpdate(jobs=[job_backup])

    time.sleep(150)

    for pod in pod_data:
        volume = client.by_id_volume(pod['pv_name'])
        snapshots = volume.snapshotList()
        count = 0
        for snapshot in snapshots:
            if snapshot.removed is False:
                count += 1

        # one backups + volume-head
        assert count == 2
Пример #3
0
def test_pv_creation(client, core_api):  # NOQA
    volume_name = "test-pv-creation"
    client.create_volume(name=volume_name, size=SIZE,
                         numberOfReplicas=2)
    volume = wait_for_volume_detached(client, volume_name)

    pv_name = "pv-" + volume_name

    volume.pvCreate(pvName=pv_name)
    for i in range(RETRY_COUNTS):
        if check_pv_existence(core_api, pv_name):
            break
        time.sleep(RETRY_INTERVAL)
    assert check_pv_existence(core_api, pv_name)

    volume = client.by_id_volume(volume_name)
    k_status = volume["kubernetesStatus"]
    workloads = k_status['workloadsStatus']
    for i in range(RETRY_COUNTS):
        if k_status['pvName'] and k_status['pvStatus'] == 'Available':
            break
        time.sleep(RETRY_INTERVAL)
        volume = client.by_id_volume(volume_name)
        k_status = volume["kubernetesStatus"]
        workloads = k_status['workloadsStatus']
    assert k_status['pvName'] == pv_name
    assert k_status['pvStatus'] == 'Available'
    assert not k_status['namespace']
    assert not k_status['pvcName']
    assert not workloads
    assert not k_status['lastPVCRefAt']
    assert not k_status['lastPodRefAt']

    # try to create one more pv for the volume
    pv_name_2 = "pv2-" + volume_name
    with pytest.raises(Exception) as e:
        volume.pvCreate(pvName=pv_name_2)
        assert "already exist" in str(e.value)

    volume = client.by_id_volume(volume_name)
    k_status = volume["kubernetesStatus"]
    workloads = k_status['workloadsStatus']
    assert k_status['pvName'] == pv_name
    assert k_status['pvStatus'] == 'Available'
    assert not k_status['namespace']
    assert not k_status['pvcName']
    assert not workloads
    assert not k_status['lastPVCRefAt']
    assert not k_status['lastPodRefAt']

    delete_and_wait_pv(core_api, pv_name)
Пример #4
0
def test_csi_expansion_with_size_round_up(client, core_api):  # NOQA
    """
    test expand longhorn volume

    1. Create longhorn volume with size '1Gi'
    2. Attach, write data, and detach
    3. Expand volume size to '2000000000/2G' and
        check if size round up '2000683008'
    4. Attach, write data, and detach
    5. Expand volume size to '2Gi' and check if size is '2147483648'
    6. Attach, write data, and detach
    """

    volume_name = generate_volume_name()
    volume = create_and_check_volume(client, volume_name, 2, str(1 * Gi))

    self_hostId = get_self_host_id()
    volume.attach(hostId=self_hostId, disableFrontend=False)
    volume = wait_for_volume_healthy(client, volume_name)
    test_data = write_volume_random_data(volume)
    volume.detach(hostId="")
    volume = wait_for_volume_detached(client, volume_name)

    volume.expand(size="2000000000")
    wait_for_volume_expansion(client, volume_name)
    volume = client.by_id_volume(volume_name)
    assert volume.size == "2000683008"

    self_hostId = get_self_host_id()
    volume.attach(hostId=self_hostId, disableFrontend=False)
    volume = wait_for_volume_healthy(client, volume_name)
    check_volume_data(volume, test_data, False)
    test_data = write_volume_random_data(volume)
    volume.detach(hostId="")
    volume = wait_for_volume_detached(client, volume_name)

    volume.expand(size=str(2 * Gi))
    wait_for_volume_expansion(client, volume_name)
    volume = client.by_id_volume(volume_name)
    assert volume.size == "2147483648"

    self_hostId = get_self_host_id()
    volume.attach(hostId=self_hostId, disableFrontend=False)
    volume = wait_for_volume_healthy(client, volume_name)
    check_volume_data(volume, test_data, False)
    volume.detach(hostId="")
    volume = wait_for_volume_detached(client, volume_name)

    client.delete(volume)
    wait_for_volume_delete(client, volume_name)
Пример #5
0
def test_replica_zone_anti_affinity(client, core_api, volume_name,
                                    k8s_node_zone_tags):  # NOQA
    """
    Test replica scheduler with zone anti-affinity

    1. Set zone anti-affinity to hard.
    2. Label nodes 1 & 2 with same zone label "zone1".
    Label node 3 with zone label "zone2".
    3. Create a volume with 3 replicas.
    4. Wait for volume condition `scheduled` to be false.
    5. Label node 2 with zone label "zone3".
    6. Wait for volume condition `scheduled` to be success.
    7. Clear the volume.
    8. Set zone anti-affinity to soft.
    9. Change the zone labels on node 1 & 2 & 3 to "zone1".
    10. Create a volume.
    11. Wait for volume condition `scheduled` to be success.
    12. Clean up the replica count, the zone labels and the volume.
    """

    wait_longhorn_node_zone_updated(client)

    replica_node_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(replica_node_soft_anti_affinity_setting, value="false")

    replica_zone_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY)
    client.update(replica_zone_soft_anti_affinity_setting, value="false")

    volume = create_and_check_volume(client, volume_name)

    lh_nodes = client.list_node()

    count = 0
    for node in lh_nodes:
        count += 1
        set_k8s_node_zone_label(core_api, node.name, "lh-zone" + str(count))

    wait_longhorn_node_zone_updated(client)

    wait_for_volume_condition_scheduled(client, volume_name, "status",
                                        CONDITION_STATUS_TRUE)

    replica_zone_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY)
    client.update(replica_zone_soft_anti_affinity_setting, value="true")

    volume = client.by_id_volume(volume_name)
    client.delete(volume)
    wait_for_volume_delete(client, volume_name)

    for node in lh_nodes:
        set_k8s_node_zone_label(core_api, node.name, "lh-zone1")

    wait_longhorn_node_zone_updated(client)

    volume = create_and_check_volume(client, volume_name)
    wait_for_volume_condition_scheduled(client, volume_name, "status",
                                        CONDITION_STATUS_TRUE)
Пример #6
0
def test_recurring_job_in_storageclass(client, core_api, storage_class,
                                       statefulset):  # NOQA
    """
    Test create volume with StorageClass contains recurring jobs

    1. Create a StorageClass with recurring jobs
    2. Create a StatefulSet with PVC template and StorageClass
    3. Verify the recurring jobs run correctly.
    """
    set_random_backupstore(client)
    statefulset_name = 'recurring-job-in-storageclass-test'
    update_statefulset_manifests(statefulset, storage_class, statefulset_name)
    storage_class["parameters"]["recurringJobs"] = json.dumps(create_jobs1())

    create_storage_class(storage_class)
    create_and_wait_statefulset(statefulset)

    pod_info = get_statefulset_pod_info(core_api, statefulset)
    volume_info = [p['pv_name'] for p in pod_info]

    # 5 minutes
    time.sleep(300)
    for volume_name in volume_info:  # NOQA
        volume = client.by_id_volume(volume_name)
        check_jobs1_result(volume)
Пример #7
0
def backupstore_test(client, core_api, csi_pv, pvc, pod_make, pod_name,
                     base_image, test_data, i):  # NOQA
    vol_name = csi_pv['metadata']['name']
    write_pod_volume_data(core_api, pod_name, test_data)

    volume = client.by_id_volume(vol_name)
    snap = volume.snapshotCreate()
    volume.snapshotBackup(name=snap["name"])

    bv, b = common.find_backup(client, vol_name, snap["name"])

    pod2_name = 'csi-backup-test-' + str(i)
    create_and_wait_csi_pod(pod2_name, client, core_api, csi_pv, pvc, pod_make,
                            base_image, b["url"])

    resp = read_volume_data(core_api, pod2_name)
    assert resp == test_data

    bv.backupDelete(name=b["name"])

    backups = bv.backupList()
    found = False
    for b in backups:
        if b["snapshotName"] == snap["name"]:
            found = True
            break
    assert not found
Пример #8
0
def backupstore_test(client, core_api, csi_pv, pvc, pod_make, pod_name, base_image, test_data, i):  # NOQA
    vol_name = csi_pv['metadata']['name']
    write_volume_data(core_api, pod_name, test_data)

    volume = client.by_id_volume(vol_name)
    snap = volume.snapshotCreate()
    volume.snapshotBackup(name=snap["name"])

    bv, b = common.find_backup(client, vol_name, snap["name"])

    pod2_name = 'csi-backup-test-' + str(i)
    create_and_wait_csi_pod(pod2_name, client, core_api, csi_pv, pvc, pod_make,
                            base_image, b["url"])

    resp = read_volume_data(core_api, pod2_name)
    assert resp == test_data

    bv.backupDelete(name=b["name"])

    backups = bv.backupList()
    found = False
    for b in backups:
        if b["snapshotName"] == snap["name"]:
            found = True
            break
    assert not found
Пример #9
0
def wait_for_recurring_backup_to_start(client,
                                       core_api,
                                       volume_name,
                                       expected_snapshot_count,
                                       minimum_progress=0):  # NOQA
    job_pod_name = volume_name + '-backup-c'
    snapshot_name = ''
    snapshots = []
    check_pod_existence(core_api, job_pod_name, namespace=LONGHORN_NAMESPACE)

    # Find the snapshot which is being backed up
    for _ in range(RETRY_BACKUP_COUNTS):
        volume = client.by_id_volume(volume_name)
        try:
            snapshots = volume.snapshotList()

            assert len(snapshots) == expected_snapshot_count + 1
            for snapshot in snapshots:
                if snapshot.children['volume-head']:
                    snapshot_name = snapshot.name
                    break
            if len(snapshot_name) != 0:
                break
        except (AttributeError, ApiException, AssertionError):
            time.sleep(RETRY_BACKUP_INTERVAL)
    assert len(snapshot_name) != 0

    # To ensure the progress of backup
    common.wait_for_backup_to_start(client,
                                    volume_name,
                                    snapshot_name=snapshot_name,
                                    chk_progress=minimum_progress)

    return snapshot_name
Пример #10
0
def ha_rebuild_replica_test(client, volname):   # NOQA
    volume = client.by_id_volume(volname)
    assert get_volume_endpoint(volume) == DEV_PATH + volname

    assert len(volume["replicas"]) == 2
    replica0 = volume["replicas"][0]
    assert replica0["name"] != ""

    replica1 = volume["replicas"][1]
    assert replica1["name"] != ""

    data = write_volume_random_data(volume)

    volume = volume.replicaRemove(name=replica0["name"])

    # wait until we saw a replica starts rebuilding
    new_replica_found = False
    for i in range(RETRY_COUNTS):
        v = client.by_id_volume(volname)
        for r in v["replicas"]:
            if r["name"] != replica0["name"] and \
                    r["name"] != replica1["name"]:
                new_replica_found = True
                break
        if new_replica_found:
            break
        time.sleep(RETRY_INTERVAL)
    assert new_replica_found

    volume = common.wait_for_volume_healthy(client, volname)

    volume = client.by_id_volume(volname)
    assert volume["state"] == common.VOLUME_STATE_ATTACHED
    assert volume["robustness"] == common.VOLUME_ROBUSTNESS_HEALTHY
    assert len(volume["replicas"]) >= 2

    found = False
    for replica in volume["replicas"]:
        if replica["name"] == replica1["name"]:
            found = True
            break
    assert found

    check_volume_data(volume, data)
Пример #11
0
def ha_rebuild_replica_test(client, volname):  # NOQA
    volume = client.by_id_volume(volname)
    assert get_volume_endpoint(volume) == DEV_PATH + volname

    assert len(volume["replicas"]) == 2
    replica0 = volume["replicas"][0]
    assert replica0["name"] != ""

    replica1 = volume["replicas"][1]
    assert replica1["name"] != ""

    data = write_volume_random_data(volume)

    volume = volume.replicaRemove(name=replica0["name"])

    # wait until we saw a replica starts rebuilding
    new_replica_found = False
    for i in range(RETRY_COUNTS):
        v = client.by_id_volume(volname)
        for r in v["replicas"]:
            if r["name"] != replica0["name"] and \
                    r["name"] != replica1["name"]:
                new_replica_found = True
                break
        if new_replica_found:
            break
        time.sleep(RETRY_INTERVAL)
    assert new_replica_found

    volume = common.wait_for_volume_healthy(client, volname)

    volume = client.by_id_volume(volname)
    assert volume["state"] == common.VOLUME_STATE_ATTACHED
    assert volume["robustness"] == common.VOLUME_ROBUSTNESS_HEALTHY
    assert len(volume["replicas"]) >= 2

    found = False
    for replica in volume["replicas"]:
        if replica["name"] == replica1["name"]:
            found = True
            break
    assert found

    check_volume_data(volume, data)
Пример #12
0
def test_statefulset_recurring_backup(
        client,
        core_api,
        storage_class,  # NOQA
        statefulset):  # NOQA
    """
    Test that recurring backups on StatefulSets work properly.
    """

    statefulset_name = 'statefulset-backup-test'
    update_test_manifests(statefulset, storage_class, statefulset_name)

    create_storage_class(storage_class)
    create_and_wait_statefulset(statefulset)

    # backup every minute
    job_backup = {
        "name": "backup",
        "cron": "* * * * *",
        "task": "backup",
        "retain": 2
    }
    pod_data = get_statefulset_pod_info(core_api, statefulset)
    for pod in pod_data:
        pod['data'] = generate_random_data(VOLUME_RWTEST_SIZE)
        pod['backup_snapshot'] = ''

    for pod in pod_data:
        volume = client.by_id_volume(pod['pv_name'])
        write_volume_data(core_api, pod['pod_name'], pod['data'])
        volume.recurringUpdate(jobs=[job_backup])

    time.sleep(300)

    for pod in pod_data:
        volume = client.by_id_volume(pod['pv_name'])
        snapshots = volume.snapshotList()
        count = 0
        for snapshot in snapshots:
            if snapshot['removed'] is False:
                count += 1

        # two backups + volume-head
        assert count == 3
Пример #13
0
def get_zone_replica_count(client, volume_name, zone_name):  # NOQA
    volume = client.by_id_volume(volume_name)

    zone_replica_count = 0
    for replica in volume.replicas:
        replica_host_id = replica.hostId
        replica_host_zone = client.by_id_node(replica_host_id).zone
        if replica_host_zone == zone_name:
            zone_replica_count += 1
    return zone_replica_count
Пример #14
0
def get_zone_replica_count(client, volume_name, zone_name, chk_running=False): # NOQA
    volume = client.by_id_volume(volume_name)

    zone_replica_count = 0
    for replica in volume.replicas:
        if chk_running and not replica.running:
            continue
        replica_host_id = replica.hostId
        replica_host_zone = client.by_id_node(replica_host_id).zone
        if replica_host_zone == zone_name:
            zone_replica_count += 1
    return zone_replica_count
Пример #15
0
def test_hard_anti_affinity_scheduling(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity work as expected.

    With Hard Anti-Affinity, scheduling on nodes with existing replicas should
    be forbidden, resulting in "Degraded" state.

    1. Create a volume and attach to the current node
    2. Generate and write `data` to the volume.
    3. Set `soft anti-affinity` to false
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
        1. Verify volume will be in degraded state.
        2. Verify volume reports condition `scheduled == false`
        3. Verify only two of three replicas of volume are healthy.
        4. Verify the remaining replica doesn't have `replica.HostID`, meaning
        it's unscheduled
    6. Check volume `data`
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    # Instead of waiting for timeout and lengthening the tests a significant
    # amount we can make sure the scheduling isn't working by making sure the
    # volume becomes Degraded and reports a scheduling error.
    wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    # While there are three replicas that should exist to meet the Volume's
    # request, only two of those volumes should actually be Healthy.
    volume = client.by_id_volume(volume_name)
    assert sum([
        1 for replica in volume.replicas
        if replica.running and replica.mode == "RW"
    ]) == 2
    # Confirm that the final volume is an unscheduled volume.
    assert sum([1 for replica in volume.replicas if not replica.hostId]) == 1
    # Three replicas in total should still exist.
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
Пример #16
0
def backupstore_test(client, core_api, csi_pv, pvc, pod_make, pod_name, vol_name, backing_image, test_data):  # NOQA
    write_pod_volume_data(core_api, pod_name, test_data)

    volume = client.by_id_volume(vol_name)
    snap = create_snapshot(client, vol_name)
    volume.snapshotBackup(name=snap.name)

    common.wait_for_backup_completion(client, vol_name, snap.name)
    bv, b = common.find_backup(client, vol_name, snap.name)

    pod2_name = 'csi-backup-test-2'
    vol2_name = create_and_wait_csi_pod(
        pod2_name, client, core_api, csi_pv, pvc, pod_make,
        backing_image, b.url)
    volume2 = client.by_id_volume(vol2_name)

    resp = read_volume_data(core_api, pod2_name)
    assert resp == test_data

    delete_backup(client, bv.name, b.name)
    delete_and_wait_pod(core_api, pod2_name)
    client.delete(volume2)
Пример #17
0
def test_statefulset_recurring_backup(client, core_api, storage_class,  # NOQA
                                      statefulset):  # NOQA
    """
    Test that recurring backups on StatefulSets work properly.
    """

    statefulset_name = 'statefulset-backup-test'
    update_statefulset_manifests(statefulset, storage_class, statefulset_name)

    create_storage_class(storage_class)
    create_and_wait_statefulset(statefulset)

    # backup every minute
    job_backup = {"name": "backup", "cron": "* * * * *",
                  "task": "backup", "retain": 2}
    pod_data = get_statefulset_pod_info(core_api, statefulset)
    for pod in pod_data:
        pod['data'] = generate_random_data(VOLUME_RWTEST_SIZE)
        pod['backup_snapshot'] = ''

    for pod in pod_data:
        volume = client.by_id_volume(pod['pv_name'])
        write_volume_data(core_api, pod['pod_name'], pod['data'])
        volume.recurringUpdate(jobs=[job_backup])

    time.sleep(300)

    for pod in pod_data:
        volume = client.by_id_volume(pod['pv_name'])
        snapshots = volume.snapshotList()
        count = 0
        for snapshot in snapshots:
            if snapshot['removed'] is False:
                count += 1

        # two backups + volume-head
        assert count == 3
Пример #18
0
def test_replica_rebuild_per_volume_limit(client, core_api, storage_class,
                                          sts_name, statefulset):  # NOQA
    """
    Test the volume always only have one replica scheduled for rebuild

    1. Set soft anti-affinity to `true`.
    2. Create a volume with 1 replica.
    3. Attach the volume and write a few hundreds MB data to it.
    4. Scale the volume replica to 5.
    5. Constantly checking the volume replica list to make sure there should be
       only 1 replica in WO state.
    6. Wait for the volume to complete rebuilding. Then remove 4 of the 5
       replicas.
    7. Monitoring the volume replica list again.
    8. Once the rebuild was completed again, verify the data checksum.
    """
    replica_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(replica_soft_anti_affinity_setting, value="true")

    data_path = '/data/test'
    storage_class['parameters']['numberOfReplicas'] = "1"
    vol_name, pod_name, md5sum = \
        common.prepare_statefulset_with_data_in_mb(
            client, core_api, statefulset, sts_name, storage_class,
            data_path=data_path, data_size_in_mb=DATA_SIZE_IN_MB_2)

    # Scale the volume replica to 5
    r_count = 5
    vol = client.by_id_volume(vol_name)
    vol.updateReplicaCount(replicaCount=r_count)

    vol = common.wait_for_volume_replicas_mode(client,
                                               vol_name,
                                               'RW',
                                               replica_count=r_count)

    # Delete 4 volume replicas
    del vol.replicas[0]
    for r in vol.replicas:
        vol.replicaRemove(name=r.name)

    r_count = 1
    common.wait_for_volume_replicas_mode(client,
                                         vol_name,
                                         'RW',
                                         replica_count=r_count)

    assert md5sum == common.get_pod_data_md5sum(core_api, pod_name, data_path)
Пример #19
0
def test_recurring_job_in_storageclass(client, core_api, storage_class, statefulset):  # NOQA
    statefulset_name = 'recurring-job-in-storageclass-test'
    update_statefulset_manifests(statefulset, storage_class, statefulset_name)
    storage_class['parameters']['recurringJobs'] = json.dumps(create_jobs1())

    create_storage_class(storage_class)
    create_and_wait_statefulset(statefulset)

    pod_info = get_statefulset_pod_info(core_api, statefulset)
    volume_info = [p['pv_name'] for p in pod_info]

    # 5 minutes
    time.sleep(300)
    for volume_name in volume_info:  # NOQA
        volume = client.by_id_volume(volume_name)
        check_jobs1_result(volume)
Пример #20
0
def test_tag_scheduling_on_update(client, node_default_tags, volume_name):  # NOQA
    """
    Test that Replicas get scheduled if a Node/Disk disks updated with the
    proper Tags.
    """
    tag_spec = {
        "disk": ["ssd", "m2"],
        "expected": 1,
        "node": ["main", "fallback"]
    }
    client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=3,
                         diskSelector=tag_spec["disk"],
                         nodeSelector=tag_spec["node"])
    volume = wait_for_volume_detached(client, volume_name)
    assert volume["diskSelector"] == tag_spec["disk"]
    assert volume["nodeSelector"] == tag_spec["node"]

    wait_scheduling_failure(client, volume_name)

    host_id = get_self_host_id()
    node = client.by_id_node(host_id)
    update_disks = get_update_disks(node["disks"])
    update_disks[0]["tags"] = tag_spec["disk"]
    node = node.diskUpdate(disks=update_disks)
    set_node_tags(client, node, tag_spec["node"])
    scheduled = False
    for i in range(RETRY_COUNTS):
        v = client.by_id_volume(volume_name)
        if v["conditions"]["scheduled"]["status"] == "True":
            scheduled = True
        if scheduled:
            break
        sleep(RETRY_INTERVAL)
    assert scheduled

    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    nodes = client.list_node()
    node_mapping = {node["id"]: {
        "disk": get_update_disks(node["disks"])[0]["tags"],
        "node": node["tags"]
    } for node in nodes}
    assert len(volume["replicas"]) == 3
    check_volume_replicas(volume, tag_spec, node_mapping)

    cleanup_volume(client, volume)
Пример #21
0
def test_recurring_job_in_storageclass(client, core_api, storage_class,
                                       statefulset):  # NOQA
    statefulset_name = 'recurring-job-in-storageclass-test'
    update_statefulset_manifests(statefulset, storage_class, statefulset_name)
    storage_class['parameters']['recurringJobs'] = json.dumps(create_jobs1())

    create_storage_class(storage_class)
    create_and_wait_statefulset(statefulset)

    pod_info = get_statefulset_pod_info(core_api, statefulset)
    volume_info = [p['pv_name'] for p in pod_info]

    # 5 minutes
    time.sleep(300)
    for volume_name in volume_info:  # NOQA
        volume = client.by_id_volume(volume_name)
        check_jobs1_result(volume)
Пример #22
0
def backupstore_test(client, core_api, csi_pv, pvc, pod_make, pod_name, base_image, test_data, i):  # NOQA
    vol_name = csi_pv['metadata']['name']
    write_pod_volume_data(core_api, pod_name, test_data)

    volume = client.by_id_volume(vol_name)
    snap = create_snapshot(client, vol_name)
    volume.snapshotBackup(name=snap.name)

    common.wait_for_backup_completion(client, vol_name, snap.name)
    bv, b = common.find_backup(client, vol_name, snap.name)

    pod2_name = 'csi-backup-test-' + str(i)
    create_and_wait_csi_pod(pod2_name, client, core_api, csi_pv, pvc, pod_make,
                            base_image, b.url)

    resp = read_volume_data(core_api, pod2_name)
    assert resp == test_data

    delete_backup(client, bv.name, b.name)
Пример #23
0
def test_recurring_job_in_storageclass(set_random_backupstore, client,
                                       core_api, storage_class,
                                       statefulset):  # NOQA
    """
    Test create volume with StorageClass contains recurring jobs

    1. Create a StorageClass with recurring jobs
    2. Create a StatefulSet with PVC template and StorageClass
    3. Verify the recurring jobs run correctly.
    """
    statefulset_name = 'recurring-job-in-storageclass-test'
    update_statefulset_manifests(statefulset, storage_class, statefulset_name)
    storage_class["parameters"]["recurringJobs"] = json.dumps(create_jobs1())

    create_storage_class(storage_class)

    # wait until the beginning of an even minute
    wait_until_begin_of_an_even_minute()

    start_time = datetime.utcnow()
    create_and_wait_statefulset(statefulset)
    statefulset_creating_duration = datetime.utcnow() - start_time

    assert 150 > statefulset_creating_duration.seconds

    # We want to write data exactly at the 150th second since the start_time
    time.sleep(150 - statefulset_creating_duration.seconds)

    pod_info = get_statefulset_pod_info(core_api, statefulset)
    volume_info = [p['pv_name'] for p in pod_info]
    pod_names = [p['pod_name'] for p in pod_info]

    # write random data to volume to trigger recurring snapshot and backup job
    volume_data_path = "/data/test"
    for pod_name in pod_names:
        write_pod_volume_random_data(core_api, pod_name, volume_data_path, 2)

    time.sleep(150)  # 2.5 minutes

    for volume_name in volume_info:  # NOQA
        volume = client.by_id_volume(volume_name)
        check_jobs1_result(volume)
Пример #24
0
def wait_new_replica_ready(client, volume_name, replica_names):  # NOQA
    """
    Wait for a new replica to be found on the specified volume. Trigger a
    failed assertion if one can't be found.
    :param client: The Longhorn client to use in the request.
    :param volume_name: The name of the volume.
    :param replica_names: The list of names of the volume's old replicas.
    """
    new_replica_ready = False
    for _ in range(RETRY_COUNTS):
        v = client.by_id_volume(volume_name)
        for r in v["replicas"]:
            if r["name"] not in replica_names and r["running"] and \
                    r["mode"] == "RW":
                new_replica_ready = True
                break
        if new_replica_ready:
            break
        sleep(RETRY_INTERVAL)
    assert new_replica_ready
Пример #25
0
def create_volume(client, vol_name, size, node_id, r_num):  # NOQA
    volume = client.create_volume(name=vol_name, size=size,
                                  numberOfReplicas=r_num)
    assert volume["numberOfReplicas"] == r_num
    assert volume["frontend"] == "blockdev"

    volume = common.wait_for_volume_detached(client, vol_name)
    assert len(volume["replicas"]) == r_num

    assert volume["state"] == "detached"
    assert volume["created"] != ""

    volumeByName = client.by_id_volume(vol_name)
    assert volumeByName["name"] == volume["name"]
    assert volumeByName["size"] == volume["size"]
    assert volumeByName["numberOfReplicas"] == volume["numberOfReplicas"]
    assert volumeByName["state"] == volume["state"]
    assert volumeByName["created"] == volume["created"]

    volume.attach(hostId=node_id)
    volume = common.wait_for_volume_healthy(client, vol_name)

    return volume
Пример #26
0
def create_volume(client, vol_name, size, node_id, r_num):  # NOQA
    volume = client.create_volume(name=vol_name, size=size,
                                  numberOfReplicas=r_num)
    assert volume["numberOfReplicas"] == r_num
    assert volume["frontend"] == "blockdev"

    volume = common.wait_for_volume_detached(client, vol_name)
    assert len(volume["replicas"]) == r_num

    assert volume["state"] == "detached"
    assert volume["created"] != ""

    volumeByName = client.by_id_volume(vol_name)
    assert volumeByName["name"] == volume["name"]
    assert volumeByName["size"] == volume["size"]
    assert volumeByName["numberOfReplicas"] == volume["numberOfReplicas"]
    assert volumeByName["state"] == volume["state"]
    assert volumeByName["created"] == volume["created"]

    volume.attach(hostId=node_id)
    volume = common.wait_for_volume_healthy(client, vol_name)

    return volume
Пример #27
0
def test_delete_with_provisioned_pv(client, core_api, storage_class, pvc): # NOQA
    """
    Test that deleting a Volume with dynamically provisioned Persistent Volume
    and Persistent Volume Claim resources successfully deletes the Volume and
    cleans up those resources.

    1. Create a Storage Class to test with.
    2. Create a Persistent Volume Claim that requests a Volume from that
    Storage Class.
    3. Wait for the Volume to be provisioned and for the Kubernetes Status to
    be updated correctly.
    4. Attempt to delete the Volume.
    5. Verify that the Volume and its associated resources have been deleted.
    """
    pv = provision_and_wait_pv(client, core_api, storage_class, pvc)
    pv_name = pv.metadata.name
    volume_name = pv.spec.csi.volume_handle  # NOQA

    volume = client.by_id_volume(volume_name)
    client.delete(volume)
    wait_for_volume_delete(client, volume_name)
    wait_delete_pv(core_api, pv_name)
    wait_delete_pvc(core_api, pvc['metadata']['name'])
def test_backup_kubernetes_status(set_random_backupstore, client, core_api,
                                  pod):  # NOQA
    """
    Test that Backups have KubernetesStatus stored properly when there is an
    associated PersistentVolumeClaim and Pod.

    1. Setup a random backupstore
    2. Set settings Longhorn Static StorageClass to `longhorn-static-test`
    3. Create a volume and PV/PVC. Verify the StorageClass of PVC
    4. Create a Pod using the PVC.
    5. Check volume's Kubernetes status to reflect PV/PVC/Pod correctly.
    6. Create a backup for the volume.
    7. Verify the labels of created backup reflect PV/PVC/Pod status.
    8. Restore the backup to a volume. Wait for restoration to complete.
    9. Check the volume's Kubernetes Status
        1. Make sure the `lastPodRefAt` and `lastPVCRefAt` is snapshot created
    time
    10. Delete the backup and restored volume.
    11. Delete PV/PVC/Pod.
    12. Verify volume's Kubernetes Status updated to reflect history data.
    13. Attach the volume and create another backup. Verify the labels
    14. Verify the volume's Kubernetes status.
    15. Restore the previous backup to a new volume. Wait for restoration.
    16. Verify the restored volume's Kubernetes status.
        1. Make sure `lastPodRefAt` and `lastPVCRefAt` matched volume on step
        12
    """

    host_id = get_self_host_id()
    static_sc_name = "longhorn-static-test"
    setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC)
    setting = client.update(setting, value=static_sc_name)
    assert setting.value == static_sc_name

    volume_name = "test-backup-kubernetes-status-pod"  # NOQA
    client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2)
    volume = wait_for_volume_detached(client, volume_name)

    pod_name = "pod-" + volume_name
    pv_name = "pv-" + volume_name
    pvc_name = "pvc-" + volume_name
    create_pv_for_volume(client, core_api, volume, pv_name)
    create_pvc_for_volume(client, core_api, volume, pvc_name)
    ret = core_api.list_namespaced_persistent_volume_claim(namespace='default')
    pvc_found = False
    for item in ret.items:
        if item.metadata.name == pvc_name:
            pvc_found = item
            break
    assert pvc_found
    assert pvc_found.spec.storage_class_name == static_sc_name

    pod['metadata']['name'] = pod_name
    pod['spec']['volumes'] = [{
        'name':
        pod['spec']['containers'][0]['volumeMounts'][0]['name'],
        'persistentVolumeClaim': {
            'claimName': pvc_name,
        },
    }]
    create_and_wait_pod(core_api, pod)

    ks = {
        'lastPodRefAt':
        '',
        'lastPVCRefAt':
        '',
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        'pvName':
        pv_name,
        'pvStatus':
        'Bound',
        'workloadsStatus': [{
            'podName': pod_name,
            'podStatus': 'Running',
            'workloadName': '',
            'workloadType': ''
        }]
    }
    wait_volume_kubernetes_status(client, volume_name, ks)
    volume = wait_for_volume_healthy(client, volume_name)

    # Create Backup manually instead of calling create_backup since Kubernetes
    # is not guaranteed to mount our Volume to the test host.
    snap = create_snapshot(client, volume_name)
    volume.snapshotBackup(name=snap.name)
    wait_for_backup_completion(client, volume_name, snap.name)
    _, b = find_backup(client, volume_name, snap.name)
    # Check backup label
    status = loads(b.labels.get(KUBERNETES_STATUS_LABEL))
    assert status == ks
    # Check backup volume label
    for _ in range(RETRY_COUNTS):
        bv = client.by_id_backupVolume(volume_name)
        if bv is not None and bv.labels is not None:
            break
        time.sleep(RETRY_INTERVAL)
    assert bv is not None and bv.labels is not None
    status = loads(bv.labels.get(KUBERNETES_STATUS_LABEL))
    assert status == ks

    restore_name = generate_volume_name()
    client.create_volume(name=restore_name,
                         size=SIZE,
                         numberOfReplicas=2,
                         fromBackup=b.url)
    wait_for_volume_restoration_completed(client, restore_name)
    wait_for_volume_detached(client, restore_name)

    snapshot_created = b.snapshotCreated
    ks = {
        'lastPodRefAt':
        b.snapshotCreated,
        'lastPVCRefAt':
        b.snapshotCreated,
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        # Restoration should not apply PersistentVolume data.
        'pvName':
        '',
        'pvStatus':
        '',
        'workloadsStatus': [{
            'podName': pod_name,
            'podStatus': 'Running',
            'workloadName': '',
            'workloadType': ''
        }]
    }
    wait_volume_kubernetes_status(client, restore_name, ks)
    restore = client.by_id_volume(restore_name)
    # We need to compare LastPodRefAt and LastPVCRefAt manually since
    # wait_volume_kubernetes_status only checks for empty or non-empty state.
    assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"]
    assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"]

    delete_backup(client, bv.name, b.name)
    client.delete(restore)
    wait_for_volume_delete(client, restore_name)
    delete_and_wait_pod(core_api, pod_name)
    delete_and_wait_pvc(core_api, pvc_name)
    delete_and_wait_pv(core_api, pv_name)

    # With the Pod, PVC, and PV deleted, the Volume should have both Ref
    # fields set. Check that a new Backup and Restore will use this instead of
    # manually populating the Ref fields.
    ks = {
        'lastPodRefAt':
        'NOT NULL',
        'lastPVCRefAt':
        'NOT NULL',
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        'pvName':
        '',
        'pvStatus':
        '',
        'workloadsStatus': [{
            'podName': pod_name,
            'podStatus': 'Running',
            'workloadName': '',
            'workloadType': ''
        }]
    }
    wait_volume_kubernetes_status(client, volume_name, ks)
    volume = wait_for_volume_detached(client, volume_name)

    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)

    snap = create_snapshot(client, volume_name)
    volume.snapshotBackup(name=snap.name)
    volume = wait_for_backup_completion(client, volume_name, snap.name)
    bv, b = find_backup(client, volume_name, snap.name)
    new_b = bv.backupGet(name=b.name)
    status = loads(new_b.labels.get(KUBERNETES_STATUS_LABEL))
    # Check each field manually, we have no idea what the LastPodRefAt or the
    # LastPVCRefAt will be. We just know it shouldn't be SnapshotCreated.
    assert status['lastPodRefAt'] != snapshot_created
    assert status['lastPVCRefAt'] != snapshot_created
    assert status['namespace'] == "default"
    assert status['pvcName'] == pvc_name
    assert status['pvName'] == ""
    assert status['pvStatus'] == ""
    assert status['workloadsStatus'] == [{
        'podName': pod_name,
        'podStatus': 'Running',
        'workloadName': '',
        'workloadType': ''
    }]

    restore_name = generate_volume_name()
    client.create_volume(name=restore_name,
                         size=SIZE,
                         numberOfReplicas=2,
                         fromBackup=b.url)
    wait_for_volume_restoration_completed(client, restore_name)
    wait_for_volume_detached(client, restore_name)

    ks = {
        'lastPodRefAt':
        status['lastPodRefAt'],
        'lastPVCRefAt':
        status['lastPVCRefAt'],
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        'pvName':
        '',
        'pvStatus':
        '',
        'workloadsStatus': [{
            'podName': pod_name,
            'podStatus': 'Running',
            'workloadName': '',
            'workloadType': ''
        }]
    }
    wait_volume_kubernetes_status(client, restore_name, ks)
    restore = client.by_id_volume(restore_name)
    assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"]
    assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"]

    # cleanup
    backupstore_cleanup(client)
    client.delete(restore)
    cleanup_volume(client, volume)
Пример #29
0
def engine_live_upgrade_rollback_test(client,
                                      volume_name,
                                      base_image=""):  # NOQA
    default_img = common.get_default_engine_image(client)
    default_img_name = default_img["name"]
    default_img = wait_for_engine_image_ref_count(client, default_img_name, 0)
    cli_v = default_img["cliAPIVersion"]
    cli_minv = default_img["cliAPIMinVersion"]
    ctl_v = default_img["controllerAPIVersion"]
    ctl_minv = default_img["controllerAPIMinVersion"]
    data_v = default_img["dataFormatVersion"]
    data_minv = default_img["dataFormatMinVersion"]
    wrong_engine_upgrade_image = common.get_compatibility_test_image(
        cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv)
    new_img = client.create_engine_image(image=wrong_engine_upgrade_image)
    new_img_name = new_img["name"]
    new_img = wait_for_engine_image_state(client, new_img_name, "ready")
    assert new_img["refCount"] == 0
    assert new_img["noRefSince"] != ""

    default_img = common.get_default_engine_image(client)
    default_img_name = default_img["name"]

    volume = client.create_volume(name=volume_name,
                                  size=SIZE,
                                  numberOfReplicas=2,
                                  baseImage=base_image)
    volume = common.wait_for_volume_detached(client, volume_name)
    default_img = wait_for_engine_image_ref_count(client, default_img_name, 1)
    assert volume["baseImage"] == base_image

    original_engine_image = volume["engineImage"]
    assert original_engine_image != wrong_engine_upgrade_image

    host_id = get_self_host_id()
    volume = volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)

    data = write_volume_random_data(volume)

    volume.engineUpgrade(image=wrong_engine_upgrade_image)
    volume = client.by_id_volume(volume["name"])
    assert volume["engineImage"] == wrong_engine_upgrade_image
    assert volume["currentImage"] == original_engine_image

    with pytest.raises(Exception):
        # this will timeout
        wait_for_volume_current_image(client, volume_name,
                                      wrong_engine_upgrade_image)

    # rollback
    volume.engineUpgrade(image=original_engine_image)
    volume = wait_for_volume_current_image(client, volume_name,
                                           original_engine_image)
    assert volume["engineImage"] == original_engine_image
    assert volume["currentImage"] == original_engine_image
    engine = get_volume_engine(volume)
    assert engine["engineImage"] == original_engine_image
    assert engine["currentImage"] == original_engine_image

    volume = common.wait_for_volume_replica_count(client, volume_name,
                                                  REPLICA_COUNT)

    check_volume_data(volume, data)

    assert volume["state"] == common.VOLUME_STATE_ATTACHED
    assert volume["robustness"] == common.VOLUME_ROBUSTNESS_HEALTHY

    # try again, this time let's try detach
    volume.engineUpgrade(image=wrong_engine_upgrade_image)
    volume = client.by_id_volume(volume["name"])
    assert volume["engineImage"] == wrong_engine_upgrade_image
    assert volume["currentImage"] == original_engine_image

    with pytest.raises(Exception):
        # this will timeout
        wait_for_volume_current_image(client, volume_name,
                                      wrong_engine_upgrade_image)

    volume = volume.detach()
    volume = wait_for_volume_current_image(client, volume_name,
                                           wrong_engine_upgrade_image)
    # all the images would be updated
    assert volume["engineImage"] == wrong_engine_upgrade_image
    engine = get_volume_engine(volume)
    assert engine["engineImage"] == wrong_engine_upgrade_image
    volume = common.wait_for_volume_replica_count(client, volume_name,
                                                  REPLICA_COUNT)
    for replica in volume["replicas"]:
        assert replica["engineImage"] == wrong_engine_upgrade_image

    # upgrade to the correct image when offline
    volume.engineUpgrade(image=original_engine_image)
    volume = client.by_id_volume(volume["name"])
    assert volume["engineImage"] == original_engine_image

    volume = volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)
    assert volume["engineImage"] == original_engine_image
    assert volume["currentImage"] == original_engine_image
    engine = get_volume_engine(volume)
    assert engine["engineImage"] == original_engine_image
    assert engine["currentImage"] == original_engine_image
    for replica in volume["replicas"]:
        assert replica["engineImage"] == original_engine_image
        assert replica["currentImage"] == original_engine_image

    check_volume_data(volume, data)

    client.delete(volume)
    wait_for_volume_delete(client, volume_name)

    client.delete(new_img)
Пример #30
0
def test_csi_expansion_with_replica_failure(client, core_api, storage_class, pvc, pod_manifest):  # NOQA
    """
    Test expansion success but with one replica expansion failure

    1. Create a new `storage_class` with `allowVolumeExpansion` set
    2. Create PVC and Pod with dynamic provisioned volume from the StorageClass
    3. Create an empty directory with expansion snapshot tmp meta file path
       for one replica so that the replica expansion will fail
    4. Generate `test_data` and write to the pod
    5. Delete the pod and wait for volume detachment
    6. Update pvc.spec.resources to expand the volume
    7. Check expansion result using Longhorn API. There will be expansion error
       caused by the failed replica but overall the expansion should succeed.
    8. Create a new pod and
       check if the volume will rebuild the failed replica
    9. Validate the volume content, then check if data writing looks fine
    """
    create_storage_class(storage_class)

    pod_name = 'csi-expansion-with-replica-failure-test'
    pvc_name = pod_name + "-pvc"
    pvc['metadata']['name'] = pvc_name
    pvc['spec']['storageClassName'] = storage_class['metadata']['name']
    create_pvc(pvc)

    pod_manifest['metadata']['name'] = pod_name
    pod_manifest['spec']['volumes'] = [{
        'name':
            pod_manifest['spec']['containers'][0]['volumeMounts'][0]['name'],
        'persistentVolumeClaim': {'claimName': pvc_name},
    }]
    create_and_wait_pod(core_api, pod_manifest)

    expand_size = str(EXPANDED_VOLUME_SIZE*Gi)
    pv = wait_and_get_pv_for_pvc(core_api, pvc_name)
    assert pv.status.phase == "Bound"
    volume_name = pv.spec.csi.volume_handle
    volume = client.by_id_volume(volume_name)
    failed_replica = volume.replicas[0]
    fail_replica_expansion(client, core_api,
                           volume_name, expand_size, [failed_replica])

    test_data = generate_random_data(VOLUME_RWTEST_SIZE)
    write_pod_volume_data(core_api, pod_name, test_data)

    delete_and_wait_pod(core_api, pod_name)
    wait_for_volume_detached(client, volume_name)

    # There will be replica expansion error info
    # but the expansion should succeed.
    pvc['spec']['resources'] = {
        'requests': {
            'storage': size_to_string(EXPANDED_VOLUME_SIZE*Gi)
        }
    }
    expand_and_wait_for_pvc(core_api, pvc)
    wait_for_expansion_failure(client, volume_name)
    wait_for_volume_expansion(client, volume_name)
    volume = client.by_id_volume(volume_name)
    assert volume.state == "detached"
    assert volume.size == expand_size
    for r in volume.replicas:
        if r.name == failed_replica.name:
            assert r.failedAt != ""
        else:
            assert r.failedAt == ""

    # Check if the replica will be rebuilded
    # and if the volume still works fine.
    create_and_wait_pod(core_api, pod_manifest)
    volume = wait_for_volume_healthy(client, volume_name)
    for r in volume.replicas:
        if r.name == failed_replica.name:
            assert r.mode == ""
        else:
            assert r.mode == "RW"
    resp = read_volume_data(core_api, pod_name)
    assert resp == test_data
    test_data = generate_random_data(VOLUME_RWTEST_SIZE)
    write_pod_volume_data(core_api, pod_name, test_data)
    resp = read_volume_data(core_api, pod_name)
    assert resp == test_data
def test_kubernetes_status(
        client,
        core_api,
        storage_class,  # NOQA
        statefulset,
        csi_pv,
        pvc,
        pod):  # NOQA
    """
    Test Volume feature: Kubernetes Status

    1. Create StorageClass with `reclaimPolicy = Retain`
    2. Create a statefulset `kubernetes-status-test` with the StorageClass
        1. The statefulset has scale of 2.
    3. Get the volume name from the SECOND pod of the StateufulSet pod and
    create an `extra_pod` with the same volume on the same node
    4. Check the volumes that used by the StatefulSet
        1. The volume used by the FIRST StatefulSet pod will have one workload
        2. The volume used by the SECOND StatefulSet pod will have two
        workloads
        3. Validate related status, e.g. pv/pod name/state, workload
        name/type
    5. Check the volumes again
        1. PV/PVC should still be bound
        2. The volume used by the FIRST pod should have history data
        3. The volume used by the SECOND and extra pod should have current data
        point to the extra pod
    6. Delete the extra pod
        1. Now all the volume's should only have history data(`lastPodRefAt`
        set)
    7. Delete the PVC
        1. PVC should be updated with status `Released` and become history data
    8. Delete PV
        1. All the Kubernetes status information should be cleaned up.
    9. Reuse the two Longhorn volumes to create new pods
        1. Since the `reclaimPolicy == Retain`, volume won't be deleted by
        Longhorn
        2. Check the Kubernetes status now updated, with pod info but empty
        workload
        3. Default Longhorn Static StorageClass will remove the PV with PVC,
        but leave Longhorn volume
    """
    statefulset_name = 'kubernetes-status-test'
    update_statefulset_manifests(statefulset, storage_class, statefulset_name)

    storage_class['reclaimPolicy'] = 'Retain'
    create_storage_class(storage_class)
    create_and_wait_statefulset(statefulset)

    pod_info = get_statefulset_pod_info(core_api, statefulset)
    volume_info = [p['pv_name'] for p in pod_info]

    extra_pod_name = 'extra-pod-using-' + volume_info[1]
    pod['metadata']['name'] = extra_pod_name
    p2 = core_api.read_namespaced_pod(name=pod_info[1]['pod_name'],
                                      namespace='default')
    pod['spec']['nodeName'] = p2.spec.node_name
    pod['spec']['volumes'] = [{
        'name':
        pod['spec']['containers'][0]['volumeMounts'][0]['name'],
        'persistentVolumeClaim': {
            'claimName': pod_info[1]['pvc_name'],
        },
    }]
    create_and_wait_pod(core_api, pod)

    for i in range(len(volume_info)):
        p, volume_name = pod_info[i], volume_info[i]  # NOQA
        volume = client.by_id_volume(volume_name)
        k_status = volume.kubernetesStatus
        workloads = k_status.workloadsStatus
        assert k_status.pvName == p['pv_name']
        assert k_status.pvStatus == 'Bound'
        assert k_status.namespace == 'default'
        assert k_status.pvcName == p['pvc_name']
        assert not k_status.lastPVCRefAt
        assert not k_status.lastPodRefAt
        if i == 0:
            assert len(workloads) == 1
            assert workloads[0].podName == p['pod_name']
            assert workloads[0].workloadName == statefulset_name
            assert workloads[0].workloadType == 'StatefulSet'
            for _ in range(RETRY_COUNTS):
                if workloads[0].podStatus == 'Running':
                    break
            time.sleep(RETRY_INTERVAL)
            volume = client.by_id_volume(volume_name)
            k_status = volume.kubernetesStatus
            workloads = k_status.workloadsStatus
            assert workloads[0].podStatus == 'Running'
        if i == 1:
            assert len(k_status.workloadsStatus) == 2
            if workloads[0].podName == pod_info[i]['pod_name']:
                assert workloads[1].podName == extra_pod_name
                assert workloads[0].workloadName == statefulset_name
                assert workloads[0].workloadType == 'StatefulSet'
                assert not workloads[1].workloadName
                assert not workloads[1].workloadType
            else:
                assert workloads[1].podName == pod_info[i]['pod_name']
                assert workloads[0].podName == extra_pod_name
                assert not workloads[0].workloadName
                assert not workloads[0].workloadType
                assert workloads[1].workloadName == statefulset_name
                assert workloads[1].workloadType == 'StatefulSet'
            for _ in range(RETRY_COUNTS):
                if workloads[0].podStatus == 'Running' and \
                        workloads[1].podStatus == 'Running':
                    break
                time.sleep(RETRY_INTERVAL)
                volume = client.by_id_volume(volume_name)
                k_status = volume.kubernetesStatus
                workloads = k_status.workloadsStatus
                assert len(workloads) == 2
            assert workloads[0].podStatus == 'Running'
            assert workloads[1].podStatus == 'Running'

    ks_list = [{}, {}]
    delete_and_wait_statefulset_only(core_api, statefulset)
    # the extra pod is still using the 2nd volume
    for i in range(len(volume_info)):
        p, volume_name = pod_info[i], volume_info[i]
        ks_list[i]['pvName'] = p['pv_name']
        ks_list[i]['pvStatus'] = 'Bound'
        ks_list[i]['namespace'] = 'default'
        ks_list[i]['pvcName'] = p['pvc_name']
        ks_list[i]['lastPVCRefAt'] = ''
        if i == 0:
            ks_list[i]['lastPodRefAt'] = 'not empty'
            ks_list[i]['workloadsStatus'] = [
                {
                    'podName': p['pod_name'],
                    'podStatus': 'Running',
                    'workloadName': statefulset_name,
                    'workloadType': 'StatefulSet',
                },
            ]
        if i == 1:
            ks_list[i]['lastPodRefAt'] = ''
            ks_list[i]['workloadsStatus'] = [{
                'podName': extra_pod_name,
                'podStatus': 'Running',
                'workloadName': '',
                'workloadType': '',
            }]
        wait_volume_kubernetes_status(client, volume_name, ks_list[i])

    # deleted extra_pod, all volumes have no workload
    delete_and_wait_pod(core_api, pod['metadata']['name'])
    for i in range(len(volume_info)):
        p, volume_name = pod_info[i], volume_info[i]
        ks_list[i]['lastPodRefAt'] = 'not empty'
        wait_volume_kubernetes_status(client, volume_name, ks_list[i])

    # deleted pvc only.
    for i in range(len(volume_info)):
        p, volume_name = pod_info[i], volume_info[i]
        delete_and_wait_pvc(core_api, p['pvc_name'])
        ks_list[i]['pvStatus'] = 'Released'
        ks_list[i]['lastPVCRefAt'] = 'not empty'
        wait_volume_kubernetes_status(client, volume_name, ks_list[i])

    # deleted pv only.
    for i in range(len(volume_info)):
        p, volume_name = pod_info[i], volume_info[i]
        delete_and_wait_pv(core_api, p['pv_name'])
        ks_list[i]['pvName'] = ''
        ks_list[i]['pvStatus'] = ''
        wait_volume_kubernetes_status(client, volume_name, ks_list[i])

    # reuse that volume
    for p, volume_name in zip(pod_info, volume_info):
        p['pod_name'] = p['pod_name'].replace('kubernetes-status-test',
                                              'kubernetes-status-test-reuse')
        p['pvc_name'] = p['pvc_name'].replace('kubernetes-status-test',
                                              'kubernetes-status-test-reuse')
        p['pv_name'] = p['pvc_name']

        csi_pv['metadata']['name'] = p['pv_name']
        csi_pv['spec']['csi']['volumeHandle'] = volume_name
        csi_pv['spec']['storageClassName'] = \
            DEFAULT_LONGHORN_STATIC_STORAGECLASS_NAME
        core_api.create_persistent_volume(csi_pv)

        pvc['metadata']['name'] = p['pvc_name']
        pvc['spec']['volumeName'] = p['pv_name']
        pvc['spec']['storageClassName'] = \
            DEFAULT_LONGHORN_STATIC_STORAGECLASS_NAME
        core_api.create_namespaced_persistent_volume_claim(body=pvc,
                                                           namespace='default')

        pod['metadata']['name'] = p['pod_name']
        pod['spec']['volumes'] = [{
            'name':
            pod['spec']['containers'][0]['volumeMounts'][0]['name'],
            'persistentVolumeClaim': {
                'claimName': p['pvc_name'],
            },
        }]
        create_and_wait_pod(core_api, pod)

        ks = {
            'pvName':
            p['pv_name'],
            'pvStatus':
            'Bound',
            'namespace':
            'default',
            'pvcName':
            p['pvc_name'],
            'lastPVCRefAt':
            '',
            'lastPodRefAt':
            '',
            'workloadsStatus': [
                {
                    'podName': p['pod_name'],
                    'podStatus': 'Running',
                    'workloadName': '',
                    'workloadType': '',
                },
            ],
        }
        wait_volume_kubernetes_status(client, volume_name, ks)

        delete_and_wait_pod(core_api, p['pod_name'])
        # Since persistentVolumeReclaimPolicy of csi_pv is `Delete`,
        # we don't need to delete bounded pv manually
        delete_and_wait_pvc(core_api, p['pvc_name'])
        wait_delete_pv(core_api, p['pv_name'])
Пример #32
0
def test_pvc_creation(client, core_api, pod):  # NOQA
    volume_name = "test-pvc-creation"
    client.create_volume(name=volume_name, size=SIZE,
                         numberOfReplicas=2)
    volume = wait_for_volume_detached(client, volume_name)

    pv_name = "pv-" + volume_name
    pvc_name = "pvc-" + volume_name
    pod_name = "pod-" + volume_name

    # try to create pvc without pv for the volume
    with pytest.raises(Exception) as e:
        volume.pvcCreate(namespace="default", pvcName=pvc_name)
        assert "connot find existing PV for volume" in str(e.value)

    volume.pvCreate(pvName=pv_name)
    for i in range(RETRY_COUNTS):
        if check_pv_existence(core_api, pv_name):
            break
        time.sleep(RETRY_INTERVAL)
    assert check_pv_existence(core_api, pv_name)

    volume = client.by_id_volume(volume_name)
    k_status = volume["kubernetesStatus"]
    for i in range(RETRY_COUNTS):
        if k_status['pvName'] and k_status['pvStatus'] == 'Available':
            break
        time.sleep(RETRY_INTERVAL)
        volume = client.by_id_volume(volume_name)
        k_status = volume["kubernetesStatus"]
    assert k_status['pvName'] == pv_name
    assert k_status['pvStatus'] == 'Available'
    assert not k_status['namespace']
    assert not k_status['pvcName']
    assert not k_status['workloadsStatus']
    assert not k_status['lastPVCRefAt']
    assert not k_status['lastPodRefAt']

    volume.pvcCreate(namespace="default", pvcName=pvc_name)
    for i in range(RETRY_COUNTS):
        if check_pvc_existence(core_api, pvc_name):
            break
        time.sleep(RETRY_INTERVAL)
    assert check_pvc_existence(core_api, pvc_name)

    volume = client.by_id_volume(volume_name)
    k_status = volume["kubernetesStatus"]
    for i in range(RETRY_COUNTS):
        if k_status['pvcName'] and k_status['namespace']:
            break
        time.sleep(RETRY_INTERVAL)
        volume = client.by_id_volume(volume_name)
        k_status = volume["kubernetesStatus"]
    assert k_status['pvName'] == pv_name
    assert k_status['pvStatus'] == 'Bound'
    assert k_status['namespace'] == "default"
    assert k_status['pvcName'] == pvc_name
    assert not k_status['workloadsStatus']
    assert not k_status['lastPVCRefAt']
    assert not k_status['lastPodRefAt']

    pod['metadata']['name'] = pod_name
    pod['spec']['volumes'] = [{
        'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'],
        'persistentVolumeClaim': {
            'claimName': pvc_name,
        },
    }]
    create_and_wait_pod(core_api, pod)

    volume = client.by_id_volume(volume_name)
    k_status = volume["kubernetesStatus"]
    workloads = k_status['workloadsStatus']
    assert k_status['pvName'] == pv_name
    assert k_status['pvStatus'] == 'Bound'
    assert len(workloads) == 1
    for i in range(RETRY_COUNTS):
        if workloads[0]['podStatus'] == 'Running':
            break
        time.sleep(RETRY_INTERVAL)
        volume = client.by_id_volume(volume_name)
        k_status = volume["kubernetesStatus"]
        workloads = k_status['workloadsStatus']
        assert len(workloads) == 1
    assert workloads[0]['podName'] == pod_name
    assert workloads[0]['podStatus'] == 'Running'
    assert not workloads[0]['workloadName']
    assert not workloads[0]['workloadType']
    assert k_status['namespace'] == 'default'
    assert k_status['pvcName'] == pvc_name
    assert not k_status['lastPVCRefAt']
    assert not k_status['lastPodRefAt']

    delete_and_wait_pod(core_api, pod_name)
    delete_and_wait_pvc(core_api, pvc_name)
    wait_delete_pv(core_api, pv_name)
Пример #33
0
def test_csi_offline_expansion(client, core_api, storage_class, pvc, pod_manifest):  # NOQA
    """
    Test CSI feature: offline expansion

    1. Create a new `storage_class` with `allowVolumeExpansion` set
    2. Create PVC and Pod with dynamic provisioned volume from the StorageClass
    3. Generate `test_data` and write to the pod
    4. Delete the pod
    5. Update pvc.spec.resources to expand the volume
    6. Verify the volume expansion done using Longhorn API
    7. Create a new pod and validate the volume content
    """
    create_storage_class(storage_class)

    pod_name = 'csi-offline-expand-volume-test'
    pvc_name = pod_name + "-pvc"
    pvc['metadata']['name'] = pvc_name
    pvc['spec']['storageClassName'] = storage_class['metadata']['name']
    create_pvc(pvc)

    pod_manifest['metadata']['name'] = pod_name
    pod_manifest['spec']['volumes'] = [{
        'name':
            pod_manifest['spec']['containers'][0]['volumeMounts'][0]['name'],
        'persistentVolumeClaim': {'claimName': pvc_name},
    }]
    create_and_wait_pod(core_api, pod_manifest)
    test_data = generate_random_data(VOLUME_RWTEST_SIZE)
    write_pod_volume_data(core_api, pod_name, test_data)
    delete_and_wait_pod(core_api, pod_name)

    pv = wait_and_get_pv_for_pvc(core_api, pvc_name)
    assert pv.status.phase == "Bound"
    volume_name = pv.spec.csi.volume_handle
    wait_for_volume_detached(client, volume_name)

    pvc['spec']['resources'] = {
        'requests': {
            'storage': size_to_string(EXPANDED_VOLUME_SIZE*Gi)
        }
    }
    expand_and_wait_for_pvc(core_api, pvc)
    wait_for_volume_expansion(client, volume_name)
    volume = client.by_id_volume(volume_name)
    assert volume.state == "detached"
    assert volume.size == str(EXPANDED_VOLUME_SIZE*Gi)

    pod_manifest['metadata']['name'] = pod_name
    pod_manifest['spec']['volumes'] = [{
        'name':
            pod_manifest['spec']['containers'][0]['volumeMounts'][0]['name'],
        'persistentVolumeClaim': {'claimName': pvc_name},
    }]
    create_and_wait_pod(core_api, pod_manifest)

    resp = read_volume_data(core_api, pod_name)
    assert resp == test_data

    volume = client.by_id_volume(volume_name)
    engine = get_volume_engine(volume)
    assert volume.size == str(EXPANDED_VOLUME_SIZE*Gi)
    assert volume.size == engine.size
Пример #34
0
def test_allow_volume_creation_with_degraded_availability_csi(
        client, core_api, apps_api, make_deployment_with_pvc):  # NOQA
    """
    Test Allow Volume Creation with Degraded Availability (CSI)

    Requirement:
    1. Set `allow-volume-creation-with-degraded-availability` to true.
    2. Set `node-level-soft-anti-affinity` to false.

    Steps:
    1. Disable scheduling for node 3.
    2. Create a Deployment Pod with a volume and 3 replicas.
        1. After the volume is attached, scheduling error should be seen.
    3. Write data to the Pod.
    4. Scale down the deployment to 0 to detach the volume.
        1. Scheduled condition should become true.
    5. Scale up the deployment back to 1 and verify the data.
        1. Scheduled condition should become false.
    6. Enable the scheduling for node 3.
        1. Volume should start rebuilding on the node 3 soon.
        2. Once the rebuilding starts, the scheduled condition should become
           true.
    7. Once rebuild finished, scale down and back the deployment to verify
       the data.
    """
    setting = client.by_id_setting(common.SETTING_DEGRADED_AVAILABILITY)
    client.update(setting, value="true")

    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")

    nodes = client.list_node()
    node3 = nodes[2]
    client.update(node3, allowScheduling=False)

    vol = common.create_and_check_volume(client, generate_volume_name(),
                                         size=str(500 * Mi))

    pv_name = vol.name + "-pv"
    common.create_pv_for_volume(client, core_api, vol, pv_name)

    pvc_name = vol.name + "-pvc"
    common.create_pvc_for_volume(client, core_api, vol, pvc_name)

    deployment_name = vol.name + "-dep"
    deployment = make_deployment_with_pvc(deployment_name, pvc_name)
    deployment["spec"]["replicas"] = 3
    apps_api.create_namespaced_deployment(body=deployment, namespace='default')
    common.wait_for_volume_status(client, vol.name,
                                  common.VOLUME_FIELD_STATE,
                                  common.VOLUME_STATE_ATTACHED)
    common.wait_scheduling_failure(client, vol.name)

    data_path = "/data/test"
    pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name)
    common.write_pod_volume_random_data(core_api, pod.metadata.name,
                                        data_path, common.DATA_SIZE_IN_MB_2)
    created_md5sum = get_pod_data_md5sum(core_api, pod.metadata.name,
                                         data_path)

    deployment['spec']['replicas'] = 0
    apps_api.patch_namespaced_deployment(body=deployment,
                                         namespace='default',
                                         name=deployment_name)
    vol = common.wait_for_volume_detached(client, vol.name)
    assert vol.conditions[VOLUME_CONDITION_SCHEDULED]['status'] == "True"

    deployment['spec']['replicas'] = 1
    apps_api.patch_namespaced_deployment(body=deployment,
                                         namespace='default',
                                         name=deployment_name)
    common.wait_for_volume_status(client, vol.name,
                                  common.VOLUME_FIELD_STATE,
                                  common.VOLUME_STATE_ATTACHED)
    common.wait_for_volume_condition_scheduled(client, vol.name, "status",
                                               common.CONDITION_STATUS_FALSE)
    pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name)
    assert created_md5sum == get_pod_data_md5sum(core_api,
                                                 pod.metadata.name,
                                                 data_path)

    client.update(node3, allowScheduling=True)
    common.wait_for_rebuild_start(client, vol.name)
    vol = client.by_id_volume(vol.name)
    assert vol.conditions[VOLUME_CONDITION_SCHEDULED]['status'] == "True"
    common.wait_for_rebuild_complete(client, vol.name)

    deployment['spec']['replicas'] = 0
    apps_api.patch_namespaced_deployment(body=deployment,
                                         namespace='default',
                                         name=deployment_name)
    common.wait_for_volume_detached(client, vol.name)

    deployment['spec']['replicas'] = 1
    apps_api.patch_namespaced_deployment(body=deployment,
                                         namespace='default',
                                         name=deployment_name)
    common.wait_for_volume_status(client, vol.name,
                                  common.VOLUME_FIELD_STATE,
                                  common.VOLUME_STATE_ATTACHED)

    pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name)
    assert created_md5sum == get_pod_data_md5sum(core_api,
                                                 pod.metadata.name,
                                                 data_path)
Пример #35
0
def test_kubernetes_status(client, core_api, storage_class,  # NOQA
                           statefulset, csi_pv, pvc, pod):  # NOQA
    statefulset_name = 'kubernetes-status-test'
    update_statefulset_manifests(statefulset, storage_class, statefulset_name)

    storage_class['reclaimPolicy'] = 'Retain'
    create_storage_class(storage_class)
    create_and_wait_statefulset(statefulset)

    pod_info = get_statefulset_pod_info(core_api, statefulset)
    volume_info = [p['pv_name'] for p in pod_info]

    extra_pod_name = 'extra-pod-using-' + volume_info[1]
    pod['metadata']['name'] = extra_pod_name
    p2 = core_api.read_namespaced_pod(name=pod_info[1]['pod_name'],
                                      namespace='default')
    pod['spec']['nodeName'] = p2.spec.node_name
    pod['spec']['volumes'] = [{
        'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'],
        'persistentVolumeClaim': {
            'claimName': pod_info[1]['pvc_name'],
        },
    }]
    create_and_wait_pod(core_api, pod)

    for i in range(len(volume_info)):
        p, volume_name = pod_info[i], volume_info[i]
        volume = client.by_id_volume(volume_name)
        k_status = volume["kubernetesStatus"]
        workloads = k_status['workloadsStatus']
        assert k_status['pvName'] == p['pv_name']
        assert k_status['pvStatus'] == 'Bound'
        assert k_status['namespace'] == 'default'
        assert k_status['pvcName'] == p['pvc_name']
        assert not k_status['lastPVCRefAt']
        assert not k_status['lastPodRefAt']
        if i == 0:
            assert len(workloads) == 1
            assert workloads[0]['podName'] == p['pod_name']
            assert workloads[0]['workloadName'] == statefulset_name
            assert workloads[0]['workloadType'] == 'StatefulSet'
            for _ in range(RETRY_COUNTS):
                if workloads[0]['podStatus'] == 'Running':
                    break
            time.sleep(RETRY_INTERVAL)
            volume = client.by_id_volume(volume_name)
            k_status = volume["kubernetesStatus"]
            workloads = k_status['workloadsStatus']
            assert workloads[0]['podStatus'] == 'Running'
        if i == 1:
            assert len(k_status['workloadsStatus']) == 2
            if workloads[0]['podName'] == pod_info[i]['pod_name']:
                assert workloads[1]['podName'] == extra_pod_name
                assert workloads[0]['workloadName'] == statefulset_name
                assert workloads[0]['workloadType'] == 'StatefulSet'
                assert not workloads[1]['workloadName']
                assert not workloads[1]['workloadType']
            else:
                assert workloads[1]['podName'] == pod_info[i]['pod_name']
                assert workloads[0]['podName'] == extra_pod_name
                assert not workloads[0]['workloadName']
                assert not workloads[0]['workloadType']
                assert workloads[1]['workloadName'] == statefulset_name
                assert workloads[1]['workloadType'] == 'StatefulSet'
            for _ in range(RETRY_COUNTS):
                if workloads[0]['podStatus'] == 'Running' and \
                        workloads[1]['podStatus'] == 'Running':
                    break
                time.sleep(RETRY_INTERVAL)
                volume = client.by_id_volume(volume_name)
                k_status = volume["kubernetesStatus"]
                workloads = k_status['workloadsStatus']
                assert len(workloads) == 2
            assert workloads[0]['podStatus'] == 'Running'
            assert workloads[1]['podStatus'] == 'Running'

    # the extra pod is still using the 2nd volume
    delete_and_wait_statefulset_only(core_api, statefulset)
    for i in range(len(volume_info)):
        p, volume_name = pod_info[i], volume_info[i]
        volume = client.by_id_volume(volume_name)
        k_status = volume["kubernetesStatus"]
        workloads = k_status['workloadsStatus']
        assert k_status['pvName'] == p['pv_name']
        assert k_status['pvStatus'] == 'Bound'
        assert k_status['namespace'] == 'default'
        assert k_status['pvcName'] == p['pvc_name']
        assert not k_status['lastPVCRefAt']
        assert len(workloads) == 1
        if i == 0:
            assert workloads[0]['podName'] == p['pod_name']
            assert workloads[0]['workloadName'] == statefulset_name
            assert workloads[0]['workloadType'] == 'StatefulSet'
            assert k_status['lastPodRefAt']
        if i == 1:
            assert workloads[0]['podName'] == extra_pod_name
            assert not workloads[0]['workloadName']
            assert not workloads[0]['workloadType']
            assert not k_status['lastPodRefAt']

    # deleted extra_pod, all volumes have no workload
    delete_and_wait_pod(core_api, pod['metadata']['name'])
    for i in range(len(volume_info)):
        p, volume_name = pod_info[i], volume_info[i]
        volume = client.by_id_volume(volume_name)
        k_status = volume["kubernetesStatus"]
        workloads = k_status['workloadsStatus']
        assert k_status['pvName'] == p['pv_name']
        assert k_status['pvStatus'] == 'Bound'
        assert k_status['namespace'] == 'default'
        assert k_status['pvcName'] == p['pvc_name']
        assert not k_status['lastPVCRefAt']
        assert k_status['lastPodRefAt']
        assert len(workloads) == 1
        if i == 0:
            assert workloads[0]['podName'] == p['pod_name']
            assert workloads[0]['workloadName'] == statefulset_name
            assert workloads[0]['workloadType'] == 'StatefulSet'
        if i == 1:
            assert workloads[0]['podName'] == extra_pod_name
            assert not workloads[0]['workloadName']
            assert not workloads[0]['workloadType']

    # deleted pvc only.
    for i in range(len(volume_info)):
        p, volume_name = pod_info[i], volume_info[i]
        delete_and_wait_pvc(core_api, p['pvc_name'])
        volume = client.by_id_volume(volume_name)
        k_status = volume["kubernetesStatus"]
        workloads = k_status['workloadsStatus']
        for _ in range(RETRY_COUNTS):
            if k_status['pvStatus'] == 'Released':
                break
            time.sleep(RETRY_INTERVAL)
            volume = client.by_id_volume(volume_name)
            k_status = volume["kubernetesStatus"]
            workloads = k_status['workloadsStatus']
        assert k_status['pvName'] == p['pv_name']
        assert k_status['pvStatus'] == 'Released'
        assert k_status['namespace'] == 'default'
        assert k_status['pvcName'] == p['pvc_name']
        assert k_status['lastPVCRefAt']
        assert k_status['lastPodRefAt']
        assert len(workloads) == 1
        if i == 0:
            assert workloads[0]['podName'] == p['pod_name']
            assert workloads[0]['workloadName'] == statefulset_name
            assert workloads[0]['workloadType'] == 'StatefulSet'
        if i == 1:
            assert workloads[0]['podName'] == extra_pod_name
            assert not workloads[0]['workloadName']
            assert not workloads[0]['workloadType']

    # deleted pv only.
    for i in range(len(volume_info)):
        p, volume_name = pod_info[i], volume_info[i]
        delete_and_wait_pv(core_api, p['pv_name'])
        volume = client.by_id_volume(volume_name)
        k_status = volume["kubernetesStatus"]
        workloads = k_status['workloadsStatus']
        assert k_status['pvName'] == ''
        assert k_status['pvStatus'] == ''
        assert k_status['namespace'] == 'default'
        assert k_status['pvcName'] == p['pvc_name']
        assert k_status['lastPVCRefAt']
        assert k_status['lastPodRefAt']
        assert len(workloads) == 1
        if i == 0:
            assert workloads[0]['podName'] == p['pod_name']
            assert workloads[0]['workloadName'] == statefulset_name
            assert workloads[0]['workloadType'] == 'StatefulSet'
        if i == 1:
            assert workloads[0]['podName'] == extra_pod_name
            assert not workloads[0]['workloadName']
            assert not workloads[0]['workloadType']

    # reuse that volume
    for p, volume_name in zip(pod_info, volume_info):
        p['pod_name'] = p['pod_name'].replace('kubernetes-status-test',
                                              'kubernetes-status-test-reuse')
        p['pvc_name'] = p['pvc_name'].replace('kubernetes-status-test',
                                              'kubernetes-status-test-reuse')
        p['pv_name'] = p['pvc_name']

        csi_pv['metadata']['name'] = p['pv_name']
        csi_pv['spec']['csi']['volumeHandle'] = volume_name
        core_api.create_persistent_volume(csi_pv)

        pvc['metadata']['name'] = p['pvc_name']
        pvc['spec']['volumeName'] = p['pv_name']
        core_api.create_namespaced_persistent_volume_claim(
            body=pvc, namespace='default')

        pod['metadata']['name'] = p['pod_name']
        pod['spec']['volumes'] = [{
            'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'],
            'persistentVolumeClaim': {
                'claimName': p['pvc_name'],
            },
        }]
        create_and_wait_pod(core_api, pod)

        volume = client.by_id_volume(volume_name)
        k_status = volume["kubernetesStatus"]
        workloads = k_status['workloadsStatus']
        assert len(workloads) == 1
        assert k_status['pvName'] == p['pv_name']
        for _ in range(RETRY_COUNTS):
            if k_status['pvStatus'] == 'Bound':
                break
            time.sleep(RETRY_INTERVAL)
            volume = client.by_id_volume(volume_name)
            k_status = volume["kubernetesStatus"]
            workloads = k_status['workloadsStatus']
            assert len(workloads) == 1
        assert k_status['pvStatus'] == 'Bound'
        for _ in range(RETRY_COUNTS):
            if workloads[0]['podStatus'] == 'Running':
                break
            time.sleep(RETRY_INTERVAL)
            volume = client.by_id_volume(volume_name)
            k_status = volume["kubernetesStatus"]
            workloads = k_status['workloadsStatus']
            assert len(workloads) == 1
        assert workloads[0]['podStatus'] == 'Running'
        assert workloads[0]['podName'] == p['pod_name']
        assert not workloads[0]['workloadName']
        assert not workloads[0]['workloadType']
        assert k_status['namespace'] == 'default'
        assert k_status['pvcName'] == p['pvc_name']
        assert not k_status['lastPVCRefAt']
        assert not k_status['lastPodRefAt']

        delete_and_wait_pod(core_api, p['pod_name'])
        # Since persistentVolumeReclaimPolicy of csi_pv is `Delete`,
        # we don't need to delete bounded pv manually
        delete_and_wait_pvc(core_api, p['pvc_name'])
        wait_delete_pv(core_api, p['pv_name'])
Пример #36
0
def cleanup_volume(client, vol_name):  # NOQA
    volume = client.by_id_volume(vol_name)
    volume.detach()
    client.delete(volume)
    common.wait_for_volume_delete(client, vol_name)
Пример #37
0
def test_node_delete_umount_disks(client):  # NOQA
    # create test disks for node
    disk_volume_name = 'vol-disk-1'
    lht_hostId = get_self_host_id()
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    disk_path1 = create_host_disk(client, disk_volume_name,
                                  str(Gi), lht_hostId)
    disk1 = {"path": disk_path1, "allowScheduling": True,
             "storageReserved": SMALL_DISK_SIZE}

    update_disk = get_update_disks(disks)
    for disk in update_disk:
        disk["allowScheduling"] = False
    # add new disk for node
    update_disk.append(disk1)
    # save disks to node
    node = node.diskUpdate(disks=update_disk)
    node = common.wait_for_disk_update(client, lht_hostId,
                                       len(update_disk))
    assert len(node["disks"]) == len(update_disk)
    node = client.by_id_node(lht_hostId)
    assert len(node["disks"]) == len(update_disk)

    disks = node["disks"]
    # wait for node controller to update disk status
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            wait_for_disk_status(client, lht_hostId, fsid,
                                 "allowScheduling", True)
            wait_for_disk_status(client, lht_hostId, fsid,
                                 "storageReserved", SMALL_DISK_SIZE)
            free, total = common.get_host_disk_size(disk_path1)
            wait_for_disk_status(client, lht_hostId, fsid,
                                 "storageAvailable", free)
            wait_for_disk_status(client, lht_hostId, fsid,
                                 "storageMaximum", total)

    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for key, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            assert disk["allowScheduling"]
            assert disk["storageReserved"] == SMALL_DISK_SIZE
            assert disk["storageScheduled"] == 0
            free, total = common.get_host_disk_size(disk_path1)
            assert disk["storageMaximum"] == total
            assert disk["storageAvailable"] == free
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_READY]["status"] == \
                CONDITION_STATUS_TRUE
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_TRUE
        else:
            assert not disk["allowScheduling"]

    # create a volume
    nodes = client.list_node()
    vol_name = common.generate_volume_name()
    volume = create_volume(client, vol_name, str(SMALL_DISK_SIZE),
                           lht_hostId, len(nodes))
    replicas = volume["replicas"]
    for replica in replicas:
        id = replica["hostId"]
        assert id != ""
        assert replica["running"]
        if id == lht_hostId:
            assert replica["dataPath"].startswith(disk_path1)

    # umount the disk
    mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name)
    common.umount_disk(mount_path)

    # wait for update node status
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            wait_for_disk_status(client, lht_hostId,
                                 fsid, "allowScheduling", False)
            wait_for_disk_status(client, lht_hostId,
                                 fsid, "storageMaximum", 0)
            wait_for_disk_conditions(client, lht_hostId, fsid,
                                     DISK_CONDITION_READY,
                                     CONDITION_STATUS_FALSE)

    # check result
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    update_disks = []
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            assert not disk["allowScheduling"]
            assert disk["storageMaximum"] == 0
            assert disk["storageAvailable"] == 0
            assert disk["storageReserved"] == SMALL_DISK_SIZE
            assert disk["storageScheduled"] == SMALL_DISK_SIZE
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_READY]["status"] == \
                CONDITION_STATUS_FALSE
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_FALSE
        else:
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_READY]["status"] == \
                CONDITION_STATUS_TRUE
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_TRUE
            update_disks.append(disk)

    # delete umount disk exception
    with pytest.raises(Exception) as e:
        node.diskUpdate(disks=update_disks)
    assert "disable the disk" in str(e.value)

    # update other disks
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] != disk_path1:
            disk["allowScheduling"] = True
    test_update = get_update_disks(disks)
    node = node.diskUpdate(disks=test_update)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] != disk_path1:
            wait_for_disk_status(client, lht_hostId,
                                 fsid, "allowScheduling", True)
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] != disk_path1:
            assert disk["allowScheduling"]

    # mount the disk back
    mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name)
    disk_volume = client.by_id_volume(disk_volume_name)
    dev = get_volume_endpoint(disk_volume)
    common.mount_disk(dev, mount_path)

    # wait for update node status
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            wait_for_disk_status(client, lht_hostId,
                                 fsid, "allowScheduling", False)
            wait_for_disk_conditions(client, lht_hostId, fsid,
                                     DISK_CONDITION_READY,
                                     CONDITION_STATUS_TRUE)

    # check result
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            free, total = common.get_host_disk_size(disk_path1)
            assert not disk["allowScheduling"]
            assert disk["storageMaximum"] == total
            assert disk["storageAvailable"] == free
            assert disk["storageReserved"] == SMALL_DISK_SIZE
            assert disk["storageScheduled"] == SMALL_DISK_SIZE
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_READY]["status"] == \
                CONDITION_STATUS_TRUE
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_TRUE
        else:
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_READY]["status"] == \
                CONDITION_STATUS_TRUE
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_TRUE

    # delete volume and umount disk
    cleanup_volume(client, vol_name)
    mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name)
    common.umount_disk(mount_path)

    # wait for update node status
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            wait_for_disk_status(client, lht_hostId,
                                 fsid, "allowScheduling", False)
            wait_for_disk_status(client, lht_hostId,
                                 fsid, "storageScheduled", 0)
            wait_for_disk_status(client, lht_hostId,
                                 fsid, "storageMaximum", 0)

    # test delete the umount disk
    node = client.by_id_node(lht_hostId)
    node.diskUpdate(disks=update_disks)
    node = common.wait_for_disk_update(client, lht_hostId,
                                       len(update_disks))
    assert len(node["disks"]) == len(update_disks)
    cmd = ['rm', '-r', mount_path]
    subprocess.check_call(cmd)
Пример #38
0
def test_zone_tags(client, core_api, volume_name, k8s_node_zone_tags):  # NOQA
    """
    Test anti affinity zone feature

    1. Add Kubernetes zone labels to the nodes
        1. Only two zones now: zone1 and zone2
    2. Create a volume with two replicas
    3. Verify zone1 and zone2 either has one replica.
    4. Remove a random replica and wait for volume to finish rebuild
    5. Verify zone1 and zone2 either has one replica.
    6. Repeat step 4-5 a few times.
    7. Update volume to 3 replicas, make sure they're scheduled on 3 nodes
    8. Remove a random replica and wait for volume to finish rebuild
    9. Make sure replicas are on different nodes
    10. Repeat step 8-9 a few times
    """

    wait_longhorn_node_zone_updated(client)

    volume = create_and_check_volume(client, volume_name, num_of_replicas=2)

    host_id = get_self_host_id()

    volume.attach(hostId=host_id)

    volume = wait_for_volume_healthy(client, volume_name)

    volume = client.by_id_volume(volume_name)

    zone1_replica_count = get_zone_replica_count(client, volume_name, ZONE1)
    zone2_replica_count = get_zone_replica_count(client, volume_name, ZONE2)

    assert zone1_replica_count == zone2_replica_count

    for i in range(randrange(3, 5)):
        volume = client.by_id_volume(volume_name)

        replica_count = len(volume.replicas)
        assert replica_count == 2

        replica_id = randrange(0, replica_count)

        replica_name = volume.replicas[replica_id].name

        volume.replicaRemove(name=replica_name)

        wait_for_volume_degraded(client, volume_name)

        wait_for_volume_healthy(client, volume_name)

        wait_for_volume_replica_count(client, volume_name, replica_count)

        volume = client.by_id_volume(volume_name)

        replica_names = map(lambda replica: replica.name, volume["replicas"])

        wait_new_replica_ready(client, volume_name, replica_names)

        zone1_replica_count = \
            get_zone_replica_count(client, volume_name, ZONE1)
        zone2_replica_count = \
            get_zone_replica_count(client, volume_name, ZONE2)

        assert zone1_replica_count == zone2_replica_count

    volume.updateReplicaCount(replicaCount=3)

    wait_for_volume_degraded(client, volume_name)

    wait_for_volume_replica_count(client, volume_name, 3)

    wait_for_volume_healthy(client, volume_name)

    volume = client.by_id_volume(volume_name)

    lh_node_names = list(map(lambda node: node.name, client.list_node()))

    for replica in volume.replicas:
        lh_node_names.remove(replica.hostId)

    assert lh_node_names == []

    for i in range(randrange(3, 5)):
        volume = client.by_id_volume(volume_name)

        replica_count = len(volume.replicas)
        assert replica_count == 3

        replica_id = randrange(0, replica_count)

        replica_name = volume.replicas[replica_id].name

        volume.replicaRemove(name=replica_name)

        wait_for_volume_degraded(client, volume_name)

        wait_for_volume_healthy(client, volume_name)

        wait_for_volume_replica_count(client, volume_name, replica_count)

        volume = client.by_id_volume(volume_name)

        lh_node_names = list(map(lambda node: node.name, client.list_node()))

        for replica in volume.replicas:
            lh_node_names.remove(replica.hostId)

        assert lh_node_names == []
Пример #39
0
def test_setting_toleration():
    """
    Test toleration setting

    1.  Set `taint-toleration` to "key1=value1:NoSchedule; key2:InvalidEffect".
    2.  Verify the request fails.
    3.  Create a volume and attach it.
    4.  Set `taint-toleration` to "key1=value1:NoSchedule; key2:NoExecute".
    5.  Verify that cannot update toleration setting when any volume is
        attached.
    6.  Generate and write `data1` into the volume.
    7.  Detach the volume.
    8.  Set `taint-toleration` to "key1=value1:NoSchedule; key2:NoExecute".
    9.  Wait for all the Longhorn system components to restart with new
        toleration.
    10. Verify that UI, manager, and drive deployer don't restart and
        don't have new toleration.
    11. Attach the volume again and verify the volume `data1`.
    12. Generate and write `data2` to the volume.
    13. Detach the volume.
    14. Clean the `toleration` setting.
    15. Wait for all the Longhorn system components to restart with no
        toleration.
    16. Attach the volume and validate `data2`.
    17. Generate and write `data3` to the volume.
    """
    client = get_longhorn_api_client()  # NOQA
    apps_api = get_apps_api_client()  # NOQA
    core_api = get_core_api_client()  # NOQA
    count = len(client.list_node())

    setting = client.by_id_setting(SETTING_TAINT_TOLERATION)

    with pytest.raises(Exception) as e:
        client.update(setting,
                      value="key1=value1:NoSchedule; key2:InvalidEffect")
    assert 'invalid effect' in str(e.value)

    volume_name = "test-toleration-vol"  # NOQA
    volume = create_and_check_volume(client, volume_name)
    volume.attach(hostId=get_self_host_id())
    volume = wait_for_volume_healthy(client, volume_name)

    setting_value_str = "key1=value1:NoSchedule; key2:NoExecute"
    setting_value_dicts = [
        {
            "key": "key1",
            "value": "value1",
            "operator": "Equal",
            "effect": "NoSchedule"
        },
        {
            "key": "key2",
            "value": None,
            "operator": "Exists",
            "effect": "NoExecute"
        },
    ]
    with pytest.raises(Exception) as e:
        client.update(setting, value=setting_value_str)
    assert 'cannot modify toleration setting before all volumes are detached' \
           in str(e.value)

    data1 = write_volume_random_data(volume)
    check_volume_data(volume, data1)

    volume.detach(hostId="")
    wait_for_volume_detached(client, volume_name)

    setting = client.update(setting, value=setting_value_str)
    assert setting.value == setting_value_str
    wait_for_toleration_update(core_api, apps_api, count, setting_value_dicts)

    client, node = wait_for_longhorn_node_ready()

    volume = client.by_id_volume(volume_name)
    volume.attach(hostId=node)
    volume = wait_for_volume_healthy(client, volume_name)
    check_volume_data(volume, data1)
    data2 = write_volume_random_data(volume)
    check_volume_data(volume, data2)
    volume.detach(hostId="")
    wait_for_volume_detached(client, volume_name)

    # cleanup
    setting_value_str = ""
    setting_value_dicts = []
    setting = client.by_id_setting(SETTING_TAINT_TOLERATION)
    setting = client.update(setting, value=setting_value_str)
    assert setting.value == setting_value_str
    wait_for_toleration_update(core_api, apps_api, count, setting_value_dicts)

    client, node = wait_for_longhorn_node_ready()

    volume = client.by_id_volume(volume_name)
    volume.attach(hostId=node)
    volume = wait_for_volume_healthy(client, volume_name)
    check_volume_data(volume, data2)
    data3 = write_volume_random_data(volume)
    check_volume_data(volume, data3)

    cleanup_volume(client, volume)
Пример #40
0
def test_setting_backing_image_auto_cleanup(client, core_api,
                                            volume_name):  # NOQA
    """
    Test that the Backing Image Cleanup Wait Interval setting works correctly.

    The default value of setting `BackingImageCleanupWaitInterval` is 60.

    1. Create a backing image.
    2. Create multiple volumes using the backing image.
    3. Attach all volumes, Then:
        1. Wait for all volumes can become running.
        2. Verify the correct in all volumes.
        3. Verify the backing image disk status map.
        4. Verify the only backing image file in each disk is reused by
           multiple replicas. The backing image file path is
           `<Data path>/<The backing image name>/backing`
    4. Unschedule test node to guarantee when replica removed from test node,
       no new replica can be rebuilt on the test node.
    5. Remove all replicas in one disk.
       Wait for 50 seconds.
       Then verify nothing changes in the backing image disk state map
       (before the cleanup wait interval is passed).
    6. Modify `BackingImageCleanupWaitInterval` to a small value. Then verify:
        1. The download state of the disk containing no replica becomes
           terminating first, and the entry will be removed from the map later.
        2. The related backing image file is removed.
        3. The download state of other disks keep unchanged.
           All volumes still work fine.
    7. Delete all volumes. Verify that there will only remain 1 entry in the
       backing image disk map
    8. Delete the backing image.
    """

    # Step 1
    create_backing_image_with_matching_url(client, BACKING_IMAGE_NAME,
                                           BACKING_IMAGE_QCOW2_URL)

    # Step 2
    volume_names = [volume_name + "-1", volume_name + "-2", volume_name + "-3"]

    for volume_name in volume_names:
        volume = create_and_check_volume(client, volume_name, 3,
                                         str(BACKING_IMAGE_EXT4_SIZE),
                                         BACKING_IMAGE_NAME)

    # Step 3
    lht_host_id = get_self_host_id()
    for volume_name in volume_names:
        volume = client.by_id_volume(volume_name)
        volume.attach(hostId=lht_host_id)
        wait_for_volume_healthy(client, volume_name)
        assert volume.backingImage == BACKING_IMAGE_NAME

    backing_image = client.by_id_backing_image(BACKING_IMAGE_NAME)
    assert len(backing_image.diskFileStatusMap) == 3
    for disk_id, status in iter(backing_image.diskFileStatusMap.items()):
        assert status.state == "ready"

    backing_images_in_disk = os.listdir("/var/lib/longhorn/backing-images")
    assert len(backing_images_in_disk) == 1
    assert os.path.exists("/var/lib/longhorn/backing-images/{}/backing".format(
        backing_images_in_disk[0]))
    assert os.path.exists(
        "/var/lib/longhorn/backing-images/{}/backing.cfg".format(
            backing_images_in_disk[0]))

    # Step 4
    current_host = client.by_id_node(id=lht_host_id)
    client.update(current_host, allowScheduling=False)
    wait_for_node_update(client, lht_host_id, "allowScheduling", False)

    # Step 5
    for volume_name in volume_names:
        volume = client.by_id_volume(volume_name)
        for replica in volume.replicas:
            if replica.hostId == lht_host_id:
                replica_name = replica.name
        volume.replicaRemove(name=replica_name)
    # This wait interval should be smaller than the setting value.
    # Otherwise, the backing image files may be cleaned up.
    time.sleep(int(BACKING_IMAGE_CLEANUP_WAIT_INTERVAL))
    check_backing_image_disk_map_status(client, BACKING_IMAGE_NAME, 3, "ready")

    # Step 6
    update_setting(client, "backing-image-cleanup-wait-interval", "1")
    check_backing_image_disk_map_status(client, BACKING_IMAGE_NAME, 2, "ready")

    backing_images_in_disk = os.listdir("/var/lib/longhorn/backing-images")
    assert len(backing_images_in_disk) == 0

    # Step 7
    for volume_name in volume_names:
        volume = client.by_id_volume(volume_name)
        client.delete(volume)
        wait_for_volume_delete(client, volume_name)

    check_backing_image_disk_map_status(client, BACKING_IMAGE_NAME, 1, "ready")